1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Instruction suffix for sign and zero extensions.
23 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
25 ;; 16 byte integral modes handled by SSE
26 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
28 ;; All 16-byte vector modes handled by SSE
29 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
32 ;; 32 byte integral vector modes handled by AVX
33 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
35 ;; All 32-byte vector modes handled by AVX
36 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
38 ;; All QI vector modes handled by AVX
39 (define_mode_iterator AVXMODEQI [V32QI V16QI])
41 ;; All DI vector modes handled by AVX
42 (define_mode_iterator AVXMODEDI [V4DI V2DI])
44 ;; All vector modes handled by AVX
45 (define_mode_iterator AVXMODE
46 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
47 (define_mode_iterator AVXMODE16
48 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
51 (define_mode_iterator SSEMODE12 [V16QI V8HI])
52 (define_mode_iterator SSEMODE24 [V8HI V4SI])
53 (define_mode_iterator SSEMODE14 [V16QI V4SI])
54 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
55 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
56 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
57 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
58 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
59 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
61 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
62 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
63 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
64 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
65 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
66 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
67 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
68 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
69 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
70 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
71 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
73 ;; Int-float size matches
74 (define_mode_iterator SSEMODE4S [V4SF V4SI])
75 (define_mode_iterator SSEMODE2D [V2DF V2DI])
77 ;; Modes handled by integer vcond pattern
78 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
79 (V2DI "TARGET_SSE4_2")])
81 ;; Modes handled by vec_extract_even/odd pattern.
82 (define_mode_iterator SSEMODE_EO
85 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
86 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
87 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
89 ;; Mapping from float mode to required SSE level
90 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
92 ;; Mapping from integer vector mode to mnemonic suffix
93 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
95 ;; Mapping of the insn mnemonic suffix
96 (define_mode_attr ssemodesuffix
97 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd")
98 (V8SI "ps") (V4DI "pd")])
99 (define_mode_attr ssescalarmodesuffix
100 [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V4DF "sd")
103 ;; Mapping of the max integer size for xop rotate immediate constraint
104 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
106 ;; Mapping of vector modes back to the scalar modes
107 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
108 (V16QI "QI") (V8HI "HI")
109 (V4SI "SI") (V2DI "DI")])
111 ;; Mapping of vector modes to a vector mode of double size
112 (define_mode_attr ssedoublesizemode
113 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
114 (V8HI "V16HI") (V16QI "V32QI")
115 (V4DF "V8DF") (V8SF "V16SF")
116 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
118 ;; Number of scalar elements in each vector type
119 (define_mode_attr ssescalarnum
120 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
121 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
124 (define_mode_attr avxvecmode
125 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
126 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
127 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
128 (define_mode_attr avxvecpsmode
129 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
130 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
131 (define_mode_attr avxhalfvecmode
132 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
133 (V8SF "V4SF") (V4DF "V2DF")
134 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
135 (define_mode_attr avxscalarmode
136 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
137 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
138 (define_mode_attr avxcvtvecmode
139 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
140 (define_mode_attr avxpermvecmode
141 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
142 (define_mode_attr avxmodesuffixp
143 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
145 (define_mode_attr avxmodesuffix
146 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
147 (V8SI "256") (V8SF "256") (V4DF "256")])
149 ;; Mapping of immediate bits for blend instructions
150 (define_mode_attr blendbits
151 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
153 ;; Mapping of immediate bits for pinsr instructions
154 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
156 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
158 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
164 (define_expand "mov<mode>"
165 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
166 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
169 ix86_expand_vector_move (<MODE>mode, operands);
173 (define_insn "*avx_mov<mode>_internal"
174 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
175 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
177 && (register_operand (operands[0], <MODE>mode)
178 || register_operand (operands[1], <MODE>mode))"
180 switch (which_alternative)
183 return standard_sse_constant_opcode (insn, operands[1]);
186 switch (get_attr_mode (insn))
190 return "vmovaps\t{%1, %0|%0, %1}";
193 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
194 return "vmovaps\t{%1, %0|%0, %1}";
196 return "vmovapd\t{%1, %0|%0, %1}";
198 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
199 return "vmovaps\t{%1, %0|%0, %1}";
201 return "vmovdqa\t{%1, %0|%0, %1}";
207 [(set_attr "type" "sselog1,ssemov,ssemov")
208 (set_attr "prefix" "vex")
209 (set_attr "mode" "<avxvecmode>")])
211 ;; All of these patterns are enabled for SSE1 as well as SSE2.
212 ;; This is essential for maintaining stable calling conventions.
214 (define_expand "mov<mode>"
215 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
216 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
219 ix86_expand_vector_move (<MODE>mode, operands);
223 (define_insn "*mov<mode>_internal"
224 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
225 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
227 && (register_operand (operands[0], <MODE>mode)
228 || register_operand (operands[1], <MODE>mode))"
230 switch (which_alternative)
233 return standard_sse_constant_opcode (insn, operands[1]);
236 switch (get_attr_mode (insn))
239 return "movaps\t{%1, %0|%0, %1}";
241 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
242 return "movaps\t{%1, %0|%0, %1}";
244 return "movapd\t{%1, %0|%0, %1}";
246 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
247 return "movaps\t{%1, %0|%0, %1}";
249 return "movdqa\t{%1, %0|%0, %1}";
255 [(set_attr "type" "sselog1,ssemov,ssemov")
257 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
258 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
259 (and (eq_attr "alternative" "2")
260 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
262 (const_string "V4SF")
263 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
264 (const_string "V4SF")
265 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
266 (const_string "V2DF")
268 (const_string "TI")))])
270 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
271 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
272 ;; from memory, we'd prefer to load the memory directly into the %xmm
273 ;; register. To facilitate this happy circumstance, this pattern won't
274 ;; split until after register allocation. If the 64-bit value didn't
275 ;; come from memory, this is the best we can do. This is much better
276 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
279 (define_insn_and_split "movdi_to_sse"
281 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
282 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
283 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
284 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
286 "&& reload_completed"
289 if (register_operand (operands[1], DImode))
291 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
292 Assemble the 64-bit DImode value in an xmm register. */
293 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
294 gen_rtx_SUBREG (SImode, operands[1], 0)));
295 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
296 gen_rtx_SUBREG (SImode, operands[1], 4)));
297 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
300 else if (memory_operand (operands[1], DImode))
301 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
302 operands[1], const0_rtx));
308 [(set (match_operand:V4SF 0 "register_operand" "")
309 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
310 "TARGET_SSE && reload_completed"
313 (vec_duplicate:V4SF (match_dup 1))
317 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
318 operands[2] = CONST0_RTX (V4SFmode);
322 [(set (match_operand:V2DF 0 "register_operand" "")
323 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
324 "TARGET_SSE2 && reload_completed"
325 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
327 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
328 operands[2] = CONST0_RTX (DFmode);
331 (define_expand "push<mode>1"
332 [(match_operand:AVX256MODE 0 "register_operand" "")]
335 ix86_expand_push (<MODE>mode, operands[0]);
339 (define_expand "push<mode>1"
340 [(match_operand:SSEMODE16 0 "register_operand" "")]
343 ix86_expand_push (<MODE>mode, operands[0]);
347 (define_expand "movmisalign<mode>"
348 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
349 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
352 ix86_expand_vector_move_misalign (<MODE>mode, operands);
356 (define_expand "movmisalign<mode>"
357 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
358 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
361 ix86_expand_vector_move_misalign (<MODE>mode, operands);
365 (define_insn "avx_movu<ssemodesuffix><avxmodesuffix>"
366 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
368 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
370 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
371 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
372 "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
373 [(set_attr "type" "ssemov")
374 (set_attr "movu" "1")
375 (set_attr "prefix" "vex")
376 (set_attr "mode" "<MODE>")])
378 (define_insn "sse2_movq128"
379 [(set (match_operand:V2DI 0 "register_operand" "=x")
382 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
383 (parallel [(const_int 0)]))
386 "%vmovq\t{%1, %0|%0, %1}"
387 [(set_attr "type" "ssemov")
388 (set_attr "prefix" "maybe_vex")
389 (set_attr "mode" "TI")])
391 (define_insn "<sse>_movu<ssemodesuffix>"
392 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
394 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
396 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
397 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
398 "movu<ssemodesuffix>\t{%1, %0|%0, %1}"
399 [(set_attr "type" "ssemov")
400 (set_attr "movu" "1")
401 (set_attr "mode" "<MODE>")])
403 (define_insn "avx_movdqu<avxmodesuffix>"
404 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
406 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
408 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
409 "vmovdqu\t{%1, %0|%0, %1}"
410 [(set_attr "type" "ssemov")
411 (set_attr "movu" "1")
412 (set_attr "prefix" "vex")
413 (set_attr "mode" "<avxvecmode>")])
415 (define_insn "sse2_movdqu"
416 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
417 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
419 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
420 "movdqu\t{%1, %0|%0, %1}"
421 [(set_attr "type" "ssemov")
422 (set_attr "movu" "1")
423 (set_attr "prefix_data16" "1")
424 (set_attr "mode" "TI")])
426 (define_insn "avx_movnt<mode>"
427 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
429 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
431 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
432 "vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
433 [(set_attr "type" "ssemov")
434 (set_attr "prefix" "vex")
435 (set_attr "mode" "<MODE>")])
437 (define_insn "<sse>_movnt<mode>"
438 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
440 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
442 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
443 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
444 [(set_attr "type" "ssemov")
445 (set_attr "mode" "<MODE>")])
447 (define_insn "avx_movnt<mode>"
448 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
450 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
453 "vmovntdq\t{%1, %0|%0, %1}"
454 [(set_attr "type" "ssecvt")
455 (set_attr "prefix" "vex")
456 (set_attr "mode" "<avxvecmode>")])
458 (define_insn "sse2_movntv2di"
459 [(set (match_operand:V2DI 0 "memory_operand" "=m")
460 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
463 "movntdq\t{%1, %0|%0, %1}"
464 [(set_attr "type" "ssemov")
465 (set_attr "prefix_data16" "1")
466 (set_attr "mode" "TI")])
468 (define_insn "sse2_movntsi"
469 [(set (match_operand:SI 0 "memory_operand" "=m")
470 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
473 "movnti\t{%1, %0|%0, %1}"
474 [(set_attr "type" "ssemov")
475 (set_attr "prefix_data16" "0")
476 (set_attr "mode" "V2DF")])
478 (define_insn "avx_lddqu<avxmodesuffix>"
479 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
481 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
484 "vlddqu\t{%1, %0|%0, %1}"
485 [(set_attr "type" "ssecvt")
486 (set_attr "movu" "1")
487 (set_attr "prefix" "vex")
488 (set_attr "mode" "<avxvecmode>")])
490 (define_insn "sse3_lddqu"
491 [(set (match_operand:V16QI 0 "register_operand" "=x")
492 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
495 "lddqu\t{%1, %0|%0, %1}"
496 [(set_attr "type" "ssemov")
497 (set_attr "movu" "1")
498 (set_attr "prefix_data16" "0")
499 (set_attr "prefix_rep" "1")
500 (set_attr "mode" "TI")])
502 ; Expand patterns for non-temporal stores. At the moment, only those
503 ; that directly map to insns are defined; it would be possible to
504 ; define patterns for other modes that would expand to several insns.
506 (define_expand "storent<mode>"
507 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
509 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
511 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)")
513 (define_expand "storent<mode>"
514 [(set (match_operand:MODEF 0 "memory_operand" "")
516 [(match_operand:MODEF 1 "register_operand" "")]
520 (define_expand "storentv2di"
521 [(set (match_operand:V2DI 0 "memory_operand" "")
522 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
526 (define_expand "storentsi"
527 [(set (match_operand:SI 0 "memory_operand" "")
528 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
532 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
534 ;; Parallel floating point arithmetic
536 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
538 (define_expand "<code><mode>2"
539 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
541 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
542 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
543 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
545 (define_expand "<plusminus_insn><mode>3"
546 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
547 (plusminus:AVX256MODEF2P
548 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
549 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
550 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
551 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
553 (define_insn "*avx_<plusminus_insn><mode>3"
554 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
555 (plusminus:AVXMODEF2P
556 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
557 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
558 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
559 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
560 "v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
561 [(set_attr "type" "sseadd")
562 (set_attr "prefix" "vex")
563 (set_attr "mode" "<avxvecmode>")])
565 (define_expand "<plusminus_insn><mode>3"
566 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
567 (plusminus:SSEMODEF2P
568 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
569 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
570 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
571 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
573 (define_insn "*<plusminus_insn><mode>3"
574 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
575 (plusminus:SSEMODEF2P
576 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
577 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
578 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
579 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
580 "<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}"
581 [(set_attr "type" "sseadd")
582 (set_attr "mode" "<MODE>")])
584 (define_insn "*avx_vm<plusminus_insn><mode>3"
585 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
586 (vec_merge:SSEMODEF2P
587 (plusminus:SSEMODEF2P
588 (match_operand:SSEMODEF2P 1 "register_operand" "x")
589 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
592 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
593 "v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
594 [(set_attr "type" "sseadd")
595 (set_attr "prefix" "vex")
596 (set_attr "mode" "<ssescalarmode>")])
598 (define_insn "<sse>_vm<plusminus_insn><mode>3"
599 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
600 (vec_merge:SSEMODEF2P
601 (plusminus:SSEMODEF2P
602 (match_operand:SSEMODEF2P 1 "register_operand" "0")
603 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
606 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
607 "<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
608 [(set_attr "type" "sseadd")
609 (set_attr "mode" "<ssescalarmode>")])
611 (define_expand "mul<mode>3"
612 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
614 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
615 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
616 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
617 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
619 (define_insn "*avx_mul<mode>3"
620 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
622 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
623 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
624 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
625 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
626 "vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
627 [(set_attr "type" "ssemul")
628 (set_attr "prefix" "vex")
629 (set_attr "mode" "<avxvecmode>")])
631 (define_expand "mul<mode>3"
632 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
634 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
635 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
636 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
637 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
639 (define_insn "*mul<mode>3"
640 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
642 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
643 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
644 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
645 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
646 "mul<ssemodesuffix>\t{%2, %0|%0, %2}"
647 [(set_attr "type" "ssemul")
648 (set_attr "mode" "<MODE>")])
650 (define_insn "*avx_vmmul<mode>3"
651 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
652 (vec_merge:SSEMODEF2P
654 (match_operand:SSEMODEF2P 1 "register_operand" "x")
655 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
658 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
659 "vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
660 [(set_attr "type" "ssemul")
661 (set_attr "prefix" "vex")
662 (set_attr "mode" "<ssescalarmode>")])
664 (define_insn "<sse>_vmmul<mode>3"
665 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
666 (vec_merge:SSEMODEF2P
668 (match_operand:SSEMODEF2P 1 "register_operand" "0")
669 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
672 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
673 "mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
674 [(set_attr "type" "ssemul")
675 (set_attr "mode" "<ssescalarmode>")])
677 (define_expand "divv8sf3"
678 [(set (match_operand:V8SF 0 "register_operand" "")
679 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
680 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
683 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
685 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
686 && flag_finite_math_only && !flag_trapping_math
687 && flag_unsafe_math_optimizations)
689 ix86_emit_swdivsf (operands[0], operands[1],
690 operands[2], V8SFmode);
695 (define_expand "divv4df3"
696 [(set (match_operand:V4DF 0 "register_operand" "")
697 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
698 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
700 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
702 (define_insn "avx_div<mode>3"
703 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
705 (match_operand:AVXMODEF2P 1 "register_operand" "x")
706 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
707 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
708 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
709 [(set_attr "type" "ssediv")
710 (set_attr "prefix" "vex")
711 (set_attr "mode" "<MODE>")])
713 (define_expand "divv4sf3"
714 [(set (match_operand:V4SF 0 "register_operand" "")
715 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
716 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
719 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
720 && flag_finite_math_only && !flag_trapping_math
721 && flag_unsafe_math_optimizations)
723 ix86_emit_swdivsf (operands[0], operands[1],
724 operands[2], V4SFmode);
729 (define_expand "divv2df3"
730 [(set (match_operand:V2DF 0 "register_operand" "")
731 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
732 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
735 (define_insn "*avx_div<mode>3"
736 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
738 (match_operand:SSEMODEF2P 1 "register_operand" "x")
739 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
740 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
741 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
742 [(set_attr "type" "ssediv")
743 (set_attr "prefix" "vex")
744 (set_attr "mode" "<MODE>")])
746 (define_insn "<sse>_div<mode>3"
747 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
749 (match_operand:SSEMODEF2P 1 "register_operand" "0")
750 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
751 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
752 "div<ssemodesuffix>\t{%2, %0|%0, %2}"
753 [(set_attr "type" "ssediv")
754 (set_attr "mode" "<MODE>")])
756 (define_insn "*avx_vmdiv<mode>3"
757 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
758 (vec_merge:SSEMODEF2P
760 (match_operand:SSEMODEF2P 1 "register_operand" "x")
761 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
764 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
765 "vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
766 [(set_attr "type" "ssediv")
767 (set_attr "prefix" "vex")
768 (set_attr "mode" "<ssescalarmode>")])
770 (define_insn "<sse>_vmdiv<mode>3"
771 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
772 (vec_merge:SSEMODEF2P
774 (match_operand:SSEMODEF2P 1 "register_operand" "0")
775 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
778 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
779 "div<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
780 [(set_attr "type" "ssediv")
781 (set_attr "mode" "<ssescalarmode>")])
783 (define_insn "avx_rcpv8sf2"
784 [(set (match_operand:V8SF 0 "register_operand" "=x")
786 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
788 "vrcpps\t{%1, %0|%0, %1}"
789 [(set_attr "type" "sse")
790 (set_attr "prefix" "vex")
791 (set_attr "mode" "V8SF")])
793 (define_insn "sse_rcpv4sf2"
794 [(set (match_operand:V4SF 0 "register_operand" "=x")
796 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
798 "%vrcpps\t{%1, %0|%0, %1}"
799 [(set_attr "type" "sse")
800 (set_attr "atom_sse_attr" "rcp")
801 (set_attr "prefix" "maybe_vex")
802 (set_attr "mode" "V4SF")])
804 (define_insn "*avx_vmrcpv4sf2"
805 [(set (match_operand:V4SF 0 "register_operand" "=x")
807 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
809 (match_operand:V4SF 2 "register_operand" "x")
812 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
813 [(set_attr "type" "sse")
814 (set_attr "prefix" "vex")
815 (set_attr "mode" "SF")])
817 (define_insn "sse_vmrcpv4sf2"
818 [(set (match_operand:V4SF 0 "register_operand" "=x")
820 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
822 (match_operand:V4SF 2 "register_operand" "0")
825 "rcpss\t{%1, %0|%0, %1}"
826 [(set_attr "type" "sse")
827 (set_attr "atom_sse_attr" "rcp")
828 (set_attr "mode" "SF")])
830 (define_expand "sqrtv8sf2"
831 [(set (match_operand:V8SF 0 "register_operand" "")
832 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
835 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
836 && flag_finite_math_only && !flag_trapping_math
837 && flag_unsafe_math_optimizations)
839 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
844 (define_insn "avx_sqrtv8sf2"
845 [(set (match_operand:V8SF 0 "register_operand" "=x")
846 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
848 "vsqrtps\t{%1, %0|%0, %1}"
849 [(set_attr "type" "sse")
850 (set_attr "prefix" "vex")
851 (set_attr "mode" "V8SF")])
853 (define_expand "sqrtv4sf2"
854 [(set (match_operand:V4SF 0 "register_operand" "")
855 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
858 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
859 && flag_finite_math_only && !flag_trapping_math
860 && flag_unsafe_math_optimizations)
862 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
867 (define_insn "sse_sqrtv4sf2"
868 [(set (match_operand:V4SF 0 "register_operand" "=x")
869 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
871 "%vsqrtps\t{%1, %0|%0, %1}"
872 [(set_attr "type" "sse")
873 (set_attr "atom_sse_attr" "sqrt")
874 (set_attr "prefix" "maybe_vex")
875 (set_attr "mode" "V4SF")])
877 (define_insn "sqrtv4df2"
878 [(set (match_operand:V4DF 0 "register_operand" "=x")
879 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
881 "vsqrtpd\t{%1, %0|%0, %1}"
882 [(set_attr "type" "sse")
883 (set_attr "prefix" "vex")
884 (set_attr "mode" "V4DF")])
886 (define_insn "sqrtv2df2"
887 [(set (match_operand:V2DF 0 "register_operand" "=x")
888 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
890 "%vsqrtpd\t{%1, %0|%0, %1}"
891 [(set_attr "type" "sse")
892 (set_attr "prefix" "maybe_vex")
893 (set_attr "mode" "V2DF")])
895 (define_insn "*avx_vmsqrt<mode>2"
896 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
897 (vec_merge:SSEMODEF2P
899 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
900 (match_operand:SSEMODEF2P 2 "register_operand" "x")
902 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
903 "vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
904 [(set_attr "type" "sse")
905 (set_attr "prefix" "vex")
906 (set_attr "mode" "<ssescalarmode>")])
908 (define_insn "<sse>_vmsqrt<mode>2"
909 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
910 (vec_merge:SSEMODEF2P
912 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
913 (match_operand:SSEMODEF2P 2 "register_operand" "0")
915 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
916 "sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
917 [(set_attr "type" "sse")
918 (set_attr "atom_sse_attr" "sqrt")
919 (set_attr "mode" "<ssescalarmode>")])
921 (define_expand "rsqrtv8sf2"
922 [(set (match_operand:V8SF 0 "register_operand" "")
924 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
925 "TARGET_AVX && TARGET_SSE_MATH"
927 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
931 (define_insn "avx_rsqrtv8sf2"
932 [(set (match_operand:V8SF 0 "register_operand" "=x")
934 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
936 "vrsqrtps\t{%1, %0|%0, %1}"
937 [(set_attr "type" "sse")
938 (set_attr "prefix" "vex")
939 (set_attr "mode" "V8SF")])
941 (define_expand "rsqrtv4sf2"
942 [(set (match_operand:V4SF 0 "register_operand" "")
944 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
947 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
951 (define_insn "sse_rsqrtv4sf2"
952 [(set (match_operand:V4SF 0 "register_operand" "=x")
954 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
956 "%vrsqrtps\t{%1, %0|%0, %1}"
957 [(set_attr "type" "sse")
958 (set_attr "prefix" "maybe_vex")
959 (set_attr "mode" "V4SF")])
961 (define_insn "*avx_vmrsqrtv4sf2"
962 [(set (match_operand:V4SF 0 "register_operand" "=x")
964 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
966 (match_operand:V4SF 2 "register_operand" "x")
969 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
970 [(set_attr "type" "sse")
971 (set_attr "prefix" "vex")
972 (set_attr "mode" "SF")])
974 (define_insn "sse_vmrsqrtv4sf2"
975 [(set (match_operand:V4SF 0 "register_operand" "=x")
977 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
979 (match_operand:V4SF 2 "register_operand" "0")
982 "rsqrtss\t{%1, %0|%0, %1}"
983 [(set_attr "type" "sse")
984 (set_attr "mode" "SF")])
986 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
987 ;; isn't really correct, as those rtl operators aren't defined when
988 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
990 (define_expand "<code><mode>3"
991 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
992 (smaxmin:AVX256MODEF2P
993 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
994 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
995 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
997 if (!flag_finite_math_only)
998 operands[1] = force_reg (<MODE>mode, operands[1]);
999 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1002 (define_expand "<code><mode>3"
1003 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1005 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1006 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1007 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1009 if (!flag_finite_math_only)
1010 operands[1] = force_reg (<MODE>mode, operands[1]);
1011 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1014 (define_insn "*avx_<code><mode>3_finite"
1015 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1017 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1018 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1019 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1020 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1021 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1022 [(set_attr "type" "sseadd")
1023 (set_attr "prefix" "vex")
1024 (set_attr "mode" "<MODE>")])
1026 (define_insn "*<code><mode>3_finite"
1027 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1029 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1030 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1031 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1032 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1033 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1034 [(set_attr "type" "sseadd")
1035 (set_attr "mode" "<MODE>")])
1037 (define_insn "*avx_<code><mode>3"
1038 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1040 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1041 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1042 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1043 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1044 [(set_attr "type" "sseadd")
1045 (set_attr "prefix" "vex")
1046 (set_attr "mode" "<avxvecmode>")])
1048 (define_insn "*<code><mode>3"
1049 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1051 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1052 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1053 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1054 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1055 [(set_attr "type" "sseadd")
1056 (set_attr "mode" "<MODE>")])
1058 (define_insn "*avx_vm<code><mode>3"
1059 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1060 (vec_merge:SSEMODEF2P
1062 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1063 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1066 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1067 "v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1068 [(set_attr "type" "sse")
1069 (set_attr "prefix" "vex")
1070 (set_attr "mode" "<ssescalarmode>")])
1072 (define_insn "<sse>_vm<code><mode>3"
1073 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1074 (vec_merge:SSEMODEF2P
1076 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1077 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1080 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1081 "<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1082 [(set_attr "type" "sseadd")
1083 (set_attr "mode" "<ssescalarmode>")])
1085 ;; These versions of the min/max patterns implement exactly the operations
1086 ;; min = (op1 < op2 ? op1 : op2)
1087 ;; max = (!(op1 < op2) ? op1 : op2)
1088 ;; Their operands are not commutative, and thus they may be used in the
1089 ;; presence of -0.0 and NaN.
1091 (define_insn "*avx_ieee_smin<mode>3"
1092 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1094 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1095 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1097 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1098 "vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1099 [(set_attr "type" "sseadd")
1100 (set_attr "prefix" "vex")
1101 (set_attr "mode" "<avxvecmode>")])
1103 (define_insn "*avx_ieee_smax<mode>3"
1104 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1106 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1107 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1109 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1110 "vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1111 [(set_attr "type" "sseadd")
1112 (set_attr "prefix" "vex")
1113 (set_attr "mode" "<avxvecmode>")])
1115 (define_insn "*ieee_smin<mode>3"
1116 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1118 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1119 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1121 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1122 "min<ssemodesuffix>\t{%2, %0|%0, %2}"
1123 [(set_attr "type" "sseadd")
1124 (set_attr "mode" "<MODE>")])
1126 (define_insn "*ieee_smax<mode>3"
1127 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1129 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1130 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1132 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1133 "max<ssemodesuffix>\t{%2, %0|%0, %2}"
1134 [(set_attr "type" "sseadd")
1135 (set_attr "mode" "<MODE>")])
1137 (define_insn "avx_addsubv8sf3"
1138 [(set (match_operand:V8SF 0 "register_operand" "=x")
1141 (match_operand:V8SF 1 "register_operand" "x")
1142 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1143 (minus:V8SF (match_dup 1) (match_dup 2))
1146 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1147 [(set_attr "type" "sseadd")
1148 (set_attr "prefix" "vex")
1149 (set_attr "mode" "V8SF")])
1151 (define_insn "avx_addsubv4df3"
1152 [(set (match_operand:V4DF 0 "register_operand" "=x")
1155 (match_operand:V4DF 1 "register_operand" "x")
1156 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1157 (minus:V4DF (match_dup 1) (match_dup 2))
1160 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1161 [(set_attr "type" "sseadd")
1162 (set_attr "prefix" "vex")
1163 (set_attr "mode" "V4DF")])
1165 (define_insn "*avx_addsubv4sf3"
1166 [(set (match_operand:V4SF 0 "register_operand" "=x")
1169 (match_operand:V4SF 1 "register_operand" "x")
1170 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1171 (minus:V4SF (match_dup 1) (match_dup 2))
1174 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1175 [(set_attr "type" "sseadd")
1176 (set_attr "prefix" "vex")
1177 (set_attr "mode" "V4SF")])
1179 (define_insn "sse3_addsubv4sf3"
1180 [(set (match_operand:V4SF 0 "register_operand" "=x")
1183 (match_operand:V4SF 1 "register_operand" "0")
1184 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1185 (minus:V4SF (match_dup 1) (match_dup 2))
1188 "addsubps\t{%2, %0|%0, %2}"
1189 [(set_attr "type" "sseadd")
1190 (set_attr "prefix_rep" "1")
1191 (set_attr "mode" "V4SF")])
1193 (define_insn "*avx_addsubv2df3"
1194 [(set (match_operand:V2DF 0 "register_operand" "=x")
1197 (match_operand:V2DF 1 "register_operand" "x")
1198 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1199 (minus:V2DF (match_dup 1) (match_dup 2))
1202 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1203 [(set_attr "type" "sseadd")
1204 (set_attr "prefix" "vex")
1205 (set_attr "mode" "V2DF")])
1207 (define_insn "sse3_addsubv2df3"
1208 [(set (match_operand:V2DF 0 "register_operand" "=x")
1211 (match_operand:V2DF 1 "register_operand" "0")
1212 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1213 (minus:V2DF (match_dup 1) (match_dup 2))
1216 "addsubpd\t{%2, %0|%0, %2}"
1217 [(set_attr "type" "sseadd")
1218 (set_attr "atom_unit" "complex")
1219 (set_attr "mode" "V2DF")])
1221 (define_insn "avx_h<plusminus_insn>v4df3"
1222 [(set (match_operand:V4DF 0 "register_operand" "=x")
1227 (match_operand:V4DF 1 "register_operand" "x")
1228 (parallel [(const_int 0)]))
1229 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1231 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1232 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1236 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1237 (parallel [(const_int 0)]))
1238 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1240 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1241 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1243 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1244 [(set_attr "type" "sseadd")
1245 (set_attr "prefix" "vex")
1246 (set_attr "mode" "V4DF")])
1248 (define_insn "avx_h<plusminus_insn>v8sf3"
1249 [(set (match_operand:V8SF 0 "register_operand" "=x")
1255 (match_operand:V8SF 1 "register_operand" "x")
1256 (parallel [(const_int 0)]))
1257 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1259 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1260 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1264 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1265 (parallel [(const_int 0)]))
1266 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1268 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1269 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1273 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1274 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1276 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1277 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1280 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1281 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1283 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1284 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1286 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1287 [(set_attr "type" "sseadd")
1288 (set_attr "prefix" "vex")
1289 (set_attr "mode" "V8SF")])
1291 (define_insn "*avx_h<plusminus_insn>v4sf3"
1292 [(set (match_operand:V4SF 0 "register_operand" "=x")
1297 (match_operand:V4SF 1 "register_operand" "x")
1298 (parallel [(const_int 0)]))
1299 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1301 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1302 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1306 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1307 (parallel [(const_int 0)]))
1308 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1310 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1311 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1313 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1314 [(set_attr "type" "sseadd")
1315 (set_attr "prefix" "vex")
1316 (set_attr "mode" "V4SF")])
1318 (define_insn "sse3_h<plusminus_insn>v4sf3"
1319 [(set (match_operand:V4SF 0 "register_operand" "=x")
1324 (match_operand:V4SF 1 "register_operand" "0")
1325 (parallel [(const_int 0)]))
1326 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1328 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1329 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1333 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1334 (parallel [(const_int 0)]))
1335 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1337 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1338 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1340 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1341 [(set_attr "type" "sseadd")
1342 (set_attr "atom_unit" "complex")
1343 (set_attr "prefix_rep" "1")
1344 (set_attr "mode" "V4SF")])
1346 (define_insn "*avx_h<plusminus_insn>v2df3"
1347 [(set (match_operand:V2DF 0 "register_operand" "=x")
1351 (match_operand:V2DF 1 "register_operand" "x")
1352 (parallel [(const_int 0)]))
1353 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1356 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1357 (parallel [(const_int 0)]))
1358 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1360 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1361 [(set_attr "type" "sseadd")
1362 (set_attr "prefix" "vex")
1363 (set_attr "mode" "V2DF")])
1365 (define_insn "sse3_h<plusminus_insn>v2df3"
1366 [(set (match_operand:V2DF 0 "register_operand" "=x")
1370 (match_operand:V2DF 1 "register_operand" "0")
1371 (parallel [(const_int 0)]))
1372 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1375 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1376 (parallel [(const_int 0)]))
1377 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1379 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1380 [(set_attr "type" "sseadd")
1381 (set_attr "mode" "V2DF")])
1383 (define_expand "reduc_splus_v4sf"
1384 [(match_operand:V4SF 0 "register_operand" "")
1385 (match_operand:V4SF 1 "register_operand" "")]
1390 rtx tmp = gen_reg_rtx (V4SFmode);
1391 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1392 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1395 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1399 (define_expand "reduc_splus_v2df"
1400 [(match_operand:V2DF 0 "register_operand" "")
1401 (match_operand:V2DF 1 "register_operand" "")]
1404 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1408 (define_expand "reduc_smax_v4sf"
1409 [(match_operand:V4SF 0 "register_operand" "")
1410 (match_operand:V4SF 1 "register_operand" "")]
1413 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1417 (define_expand "reduc_smin_v4sf"
1418 [(match_operand:V4SF 0 "register_operand" "")
1419 (match_operand:V4SF 1 "register_operand" "")]
1422 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1426 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1428 ;; Parallel floating point comparisons
1430 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1432 (define_insn "avx_cmp<ssemodesuffix><mode>3"
1433 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1435 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1436 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1437 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1440 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1441 [(set_attr "type" "ssecmp")
1442 (set_attr "length_immediate" "1")
1443 (set_attr "prefix" "vex")
1444 (set_attr "mode" "<MODE>")])
1446 (define_insn "avx_cmp<ssescalarmodesuffix><mode>3"
1447 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1448 (vec_merge:SSEMODEF2P
1450 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1451 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1452 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1457 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1458 [(set_attr "type" "ssecmp")
1459 (set_attr "length_immediate" "1")
1460 (set_attr "prefix" "vex")
1461 (set_attr "mode" "<ssescalarmode>")])
1463 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1464 ;; may generate 256bit vector compare instructions.
1465 (define_insn "*avx_maskcmp<mode>3"
1466 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1467 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1468 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1469 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1470 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1471 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1472 [(set_attr "type" "ssecmp")
1473 (set_attr "prefix" "vex")
1474 (set_attr "length_immediate" "1")
1475 (set_attr "mode" "<avxvecmode>")])
1477 (define_insn "<sse>_maskcmp<mode>3"
1478 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1479 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1480 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1481 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1483 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1484 "cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}"
1485 [(set_attr "type" "ssecmp")
1486 (set_attr "length_immediate" "1")
1487 (set_attr "mode" "<MODE>")])
1489 (define_insn "*avx_vmmaskcmp<mode>3"
1490 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1491 (vec_merge:SSEMODEF2P
1492 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1493 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1494 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1497 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1498 "vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1499 [(set_attr "type" "ssecmp")
1500 (set_attr "prefix" "vex")
1501 (set_attr "mode" "<ssescalarmode>")])
1503 (define_insn "<sse>_vmmaskcmp<mode>3"
1504 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1505 (vec_merge:SSEMODEF2P
1506 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1507 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1508 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1511 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1512 "cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1513 [(set_attr "type" "ssecmp")
1514 (set_attr "length_immediate" "1")
1515 (set_attr "mode" "<ssescalarmode>")])
1517 (define_insn "<sse>_comi"
1518 [(set (reg:CCFP FLAGS_REG)
1521 (match_operand:<ssevecmode> 0 "register_operand" "x")
1522 (parallel [(const_int 0)]))
1524 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1525 (parallel [(const_int 0)]))))]
1526 "SSE_FLOAT_MODE_P (<MODE>mode)"
1527 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1528 [(set_attr "type" "ssecomi")
1529 (set_attr "prefix" "maybe_vex")
1530 (set_attr "prefix_rep" "0")
1531 (set (attr "prefix_data16")
1532 (if_then_else (eq_attr "mode" "DF")
1534 (const_string "0")))
1535 (set_attr "mode" "<MODE>")])
1537 (define_insn "<sse>_ucomi"
1538 [(set (reg:CCFPU FLAGS_REG)
1541 (match_operand:<ssevecmode> 0 "register_operand" "x")
1542 (parallel [(const_int 0)]))
1544 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1545 (parallel [(const_int 0)]))))]
1546 "SSE_FLOAT_MODE_P (<MODE>mode)"
1547 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1548 [(set_attr "type" "ssecomi")
1549 (set_attr "prefix" "maybe_vex")
1550 (set_attr "prefix_rep" "0")
1551 (set (attr "prefix_data16")
1552 (if_then_else (eq_attr "mode" "DF")
1554 (const_string "0")))
1555 (set_attr "mode" "<MODE>")])
1557 (define_expand "vcond<mode>"
1558 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1559 (if_then_else:AVXMODEF2P
1560 (match_operator 3 ""
1561 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1562 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1563 (match_operand:AVXMODEF2P 1 "general_operand" "")
1564 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1565 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1566 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1568 bool ok = ix86_expand_fp_vcond (operands);
1573 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1575 ;; Parallel floating point logical operations
1577 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1579 (define_insn "avx_andnot<mode>3"
1580 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1583 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1584 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1585 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1586 "vandn<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1587 [(set_attr "type" "sselog")
1588 (set_attr "prefix" "vex")
1589 (set_attr "mode" "<avxvecmode>")])
1591 (define_insn "<sse>_andnot<mode>3"
1592 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1595 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1596 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1597 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1598 "andn<ssemodesuffix>\t{%2, %0|%0, %2}"
1599 [(set_attr "type" "sselog")
1600 (set_attr "mode" "<MODE>")])
1602 (define_expand "<code><mode>3"
1603 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1604 (any_logic:AVX256MODEF2P
1605 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1606 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1607 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1608 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1610 (define_insn "*avx_<code><mode>3"
1611 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1612 (any_logic:AVXMODEF2P
1613 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1614 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1615 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1616 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1618 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1619 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1621 return "v<logic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1623 [(set_attr "type" "sselog")
1624 (set_attr "prefix" "vex")
1625 (set_attr "mode" "<avxvecmode>")])
1627 (define_expand "<code><mode>3"
1628 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1629 (any_logic:SSEMODEF2P
1630 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1631 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1632 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1633 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1635 (define_insn "*<code><mode>3"
1636 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1637 (any_logic:SSEMODEF2P
1638 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1639 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1640 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1641 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1643 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1644 return "<logic>ps\t{%2, %0|%0, %2}";
1646 return "<logic><ssemodesuffix>\t{%2, %0|%0, %2}";
1648 [(set_attr "type" "sselog")
1649 (set_attr "mode" "<MODE>")])
1651 (define_expand "copysign<mode>3"
1654 (not:SSEMODEF2P (match_dup 3))
1655 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1657 (and:SSEMODEF2P (match_dup 3)
1658 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1659 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1660 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1661 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1663 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1665 operands[4] = gen_reg_rtx (<MODE>mode);
1666 operands[5] = gen_reg_rtx (<MODE>mode);
1669 ;; Also define scalar versions. These are used for abs, neg, and
1670 ;; conditional move. Using subregs into vector modes causes register
1671 ;; allocation lossage. These patterns do not allow memory operands
1672 ;; because the native instructions read the full 128-bits.
1674 (define_insn "*avx_andnot<mode>3"
1675 [(set (match_operand:MODEF 0 "register_operand" "=x")
1678 (match_operand:MODEF 1 "register_operand" "x"))
1679 (match_operand:MODEF 2 "register_operand" "x")))]
1680 "AVX_FLOAT_MODE_P (<MODE>mode)"
1681 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1682 [(set_attr "type" "sselog")
1683 (set_attr "prefix" "vex")
1684 (set_attr "mode" "<ssevecmode>")])
1686 (define_insn "*andnot<mode>3"
1687 [(set (match_operand:MODEF 0 "register_operand" "=x")
1690 (match_operand:MODEF 1 "register_operand" "0"))
1691 (match_operand:MODEF 2 "register_operand" "x")))]
1692 "SSE_FLOAT_MODE_P (<MODE>mode)"
1693 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1694 [(set_attr "type" "sselog")
1695 (set_attr "mode" "<ssevecmode>")])
1697 (define_insn "*avx_<code><mode>3"
1698 [(set (match_operand:MODEF 0 "register_operand" "=x")
1700 (match_operand:MODEF 1 "register_operand" "x")
1701 (match_operand:MODEF 2 "register_operand" "x")))]
1702 "AVX_FLOAT_MODE_P (<MODE>mode)"
1704 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1705 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1707 return "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}";
1709 [(set_attr "type" "sselog")
1710 (set_attr "prefix" "vex")
1711 (set_attr "mode" "<ssevecmode>")])
1713 (define_insn "*<code><mode>3"
1714 [(set (match_operand:MODEF 0 "register_operand" "=x")
1716 (match_operand:MODEF 1 "register_operand" "0")
1717 (match_operand:MODEF 2 "register_operand" "x")))]
1718 "SSE_FLOAT_MODE_P (<MODE>mode)"
1720 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1721 return "<logic>ps\t{%2, %0|%0, %2}";
1723 return "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}";
1725 [(set_attr "type" "sselog")
1726 (set_attr "mode" "<ssevecmode>")])
1728 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1730 ;; FMA4 floating point multiply/accumulate instructions. This
1731 ;; includes the scalar version of the instructions as well as the
1734 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1736 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1737 ;; combine to generate a multiply/add with two memory references. We then
1738 ;; split this insn, into loading up the destination register with one of the
1739 ;; memory operations. If we don't manage to split the insn, reload will
1740 ;; generate the appropriate moves. The reason this is needed, is that combine
1741 ;; has already folded one of the memory references into both the multiply and
1742 ;; add insns, and it can't generate a new pseudo. I.e.:
1743 ;; (set (reg1) (mem (addr1)))
1744 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1745 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1747 (define_insn "fma4_fmadd<mode>4256"
1748 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1751 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1752 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1753 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1754 "TARGET_FMA4 && TARGET_FUSED_MADD"
1755 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1756 [(set_attr "type" "ssemuladd")
1757 (set_attr "mode" "<MODE>")])
1759 ;; Floating multiply and subtract.
1760 (define_insn "fma4_fmsub<mode>4256"
1761 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1764 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1765 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1766 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1767 "TARGET_FMA4 && TARGET_FUSED_MADD"
1768 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1769 [(set_attr "type" "ssemuladd")
1770 (set_attr "mode" "<MODE>")])
1772 ;; Floating point negative multiply and add.
1773 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1774 (define_insn "fma4_fnmadd<mode>4256"
1775 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1777 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1779 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1780 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
1781 "TARGET_FMA4 && TARGET_FUSED_MADD"
1782 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1783 [(set_attr "type" "ssemuladd")
1784 (set_attr "mode" "<MODE>")])
1786 ;; Floating point negative multiply and subtract.
1787 (define_insn "fma4_fnmsub<mode>4256"
1788 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1792 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1793 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1794 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1795 "TARGET_FMA4 && TARGET_FUSED_MADD"
1796 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1797 [(set_attr "type" "ssemuladd")
1798 (set_attr "mode" "<MODE>")])
1800 (define_insn "fma4_fmadd<mode>4"
1801 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1804 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1805 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1806 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1807 "TARGET_FMA4 && TARGET_FUSED_MADD"
1808 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1809 [(set_attr "type" "ssemuladd")
1810 (set_attr "mode" "<MODE>")])
1812 ;; For the scalar operations, use operand1 for the upper words that aren't
1813 ;; modified, so restrict the forms that are generated.
1814 ;; Scalar version of fmadd.
1815 (define_insn "fma4_vmfmadd<mode>4"
1816 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1817 (vec_merge:SSEMODEF2P
1820 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1821 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1822 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1825 "TARGET_FMA4 && TARGET_FUSED_MADD"
1826 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1827 [(set_attr "type" "ssemuladd")
1828 (set_attr "mode" "<MODE>")])
1830 ;; Floating multiply and subtract.
1831 ;; Allow two memory operands the same as fmadd.
1832 (define_insn "fma4_fmsub<mode>4"
1833 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1836 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1837 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1838 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1839 "TARGET_FMA4 && TARGET_FUSED_MADD"
1840 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1841 [(set_attr "type" "ssemuladd")
1842 (set_attr "mode" "<MODE>")])
1844 ;; For the scalar operations, use operand1 for the upper words that aren't
1845 ;; modified, so restrict the forms that are generated.
1846 ;; Scalar version of fmsub.
1847 (define_insn "fma4_vmfmsub<mode>4"
1848 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1849 (vec_merge:SSEMODEF2P
1852 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1853 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1854 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1857 "TARGET_FMA4 && TARGET_FUSED_MADD"
1858 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1859 [(set_attr "type" "ssemuladd")
1860 (set_attr "mode" "<MODE>")])
1862 ;; Floating point negative multiply and add.
1863 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1864 (define_insn "fma4_fnmadd<mode>4"
1865 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1867 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
1869 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1870 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
1871 "TARGET_FMA4 && TARGET_FUSED_MADD"
1872 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1873 [(set_attr "type" "ssemuladd")
1874 (set_attr "mode" "<MODE>")])
1876 ;; For the scalar operations, use operand1 for the upper words that aren't
1877 ;; modified, so restrict the forms that are generated.
1878 ;; Scalar version of fnmadd.
1879 (define_insn "fma4_vmfnmadd<mode>4"
1880 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1881 (vec_merge:SSEMODEF2P
1883 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1885 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1886 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
1889 "TARGET_FMA4 && TARGET_FUSED_MADD"
1890 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1891 [(set_attr "type" "ssemuladd")
1892 (set_attr "mode" "<MODE>")])
1894 ;; Floating point negative multiply and subtract.
1895 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
1896 (define_insn "fma4_fnmsub<mode>4"
1897 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1901 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
1902 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1903 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1904 "TARGET_FMA4 && TARGET_FUSED_MADD"
1905 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1906 [(set_attr "type" "ssemuladd")
1907 (set_attr "mode" "<MODE>")])
1909 ;; For the scalar operations, use operand1 for the upper words that aren't
1910 ;; modified, so restrict the forms that are generated.
1911 ;; Scalar version of fnmsub.
1912 (define_insn "fma4_vmfnmsub<mode>4"
1913 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1914 (vec_merge:SSEMODEF2P
1918 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1919 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1920 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1923 "TARGET_FMA4 && TARGET_FUSED_MADD"
1924 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1925 [(set_attr "type" "ssemuladd")
1926 (set_attr "mode" "<MODE>")])
1928 (define_insn "fma4i_fmadd<mode>4256"
1929 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1933 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1934 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1935 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1936 UNSPEC_FMA4_INTRINSIC))]
1938 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1939 [(set_attr "type" "ssemuladd")
1940 (set_attr "mode" "<MODE>")])
1942 (define_insn "fma4i_fmsub<mode>4256"
1943 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1947 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1948 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1949 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1950 UNSPEC_FMA4_INTRINSIC))]
1952 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1953 [(set_attr "type" "ssemuladd")
1954 (set_attr "mode" "<MODE>")])
1956 (define_insn "fma4i_fnmadd<mode>4256"
1957 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1960 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1962 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1963 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
1964 UNSPEC_FMA4_INTRINSIC))]
1966 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1967 [(set_attr "type" "ssemuladd")
1968 (set_attr "mode" "<MODE>")])
1970 (define_insn "fma4i_fnmsub<mode>4256"
1971 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1976 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1977 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1978 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1979 UNSPEC_FMA4_INTRINSIC))]
1981 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1982 [(set_attr "type" "ssemuladd")
1983 (set_attr "mode" "<MODE>")])
1985 (define_insn "fma4i_fmadd<mode>4"
1986 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1990 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1991 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1992 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1993 UNSPEC_FMA4_INTRINSIC))]
1995 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1996 [(set_attr "type" "ssemuladd")
1997 (set_attr "mode" "<MODE>")])
1999 (define_insn "fma4i_fmsub<mode>4"
2000 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2004 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2005 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2006 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2007 UNSPEC_FMA4_INTRINSIC))]
2009 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2010 [(set_attr "type" "ssemuladd")
2011 (set_attr "mode" "<MODE>")])
2013 (define_insn "fma4i_fnmadd<mode>4"
2014 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2017 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2019 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2020 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
2021 UNSPEC_FMA4_INTRINSIC))]
2023 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2024 [(set_attr "type" "ssemuladd")
2025 (set_attr "mode" "<MODE>")])
2027 (define_insn "fma4i_fnmsub<mode>4"
2028 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2033 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2034 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2035 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2036 UNSPEC_FMA4_INTRINSIC))]
2038 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2039 [(set_attr "type" "ssemuladd")
2040 (set_attr "mode" "<MODE>")])
2042 ;; For the scalar operations, use operand1 for the upper words that aren't
2043 ;; modified, so restrict the forms that are accepted.
2044 (define_insn "fma4i_vmfmadd<mode>4"
2045 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2047 [(vec_merge:SSEMODEF2P
2050 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2051 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2052 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2055 UNSPEC_FMA4_INTRINSIC))]
2057 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2058 [(set_attr "type" "ssemuladd")
2059 (set_attr "mode" "<ssescalarmode>")])
2061 (define_insn "fma4i_vmfmsub<mode>4"
2062 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2064 [(vec_merge:SSEMODEF2P
2067 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2068 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2069 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2072 UNSPEC_FMA4_INTRINSIC))]
2074 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2075 [(set_attr "type" "ssemuladd")
2076 (set_attr "mode" "<ssescalarmode>")])
2078 (define_insn "fma4i_vmfnmadd<mode>4"
2079 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2081 [(vec_merge:SSEMODEF2P
2083 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2085 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2086 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
2089 UNSPEC_FMA4_INTRINSIC))]
2091 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2092 [(set_attr "type" "ssemuladd")
2093 (set_attr "mode" "<ssescalarmode>")])
2095 (define_insn "fma4i_vmfnmsub<mode>4"
2096 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2098 [(vec_merge:SSEMODEF2P
2102 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2103 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2104 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2107 UNSPEC_FMA4_INTRINSIC))]
2109 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2110 [(set_attr "type" "ssemuladd")
2111 (set_attr "mode" "<ssescalarmode>")])
2113 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2115 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2117 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2119 (define_insn "fma4_fmaddsubv8sf4"
2120 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2124 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2125 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2126 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2133 "TARGET_FMA4 && TARGET_FUSED_MADD"
2134 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2135 [(set_attr "type" "ssemuladd")
2136 (set_attr "mode" "V8SF")])
2138 (define_insn "fma4_fmaddsubv4df4"
2139 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2143 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2144 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2145 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2152 "TARGET_FMA4 && TARGET_FUSED_MADD"
2153 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2154 [(set_attr "type" "ssemuladd")
2155 (set_attr "mode" "V4DF")])
2157 (define_insn "fma4_fmaddsubv4sf4"
2158 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2162 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2163 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2164 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2171 "TARGET_FMA4 && TARGET_FUSED_MADD"
2172 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2173 [(set_attr "type" "ssemuladd")
2174 (set_attr "mode" "V4SF")])
2176 (define_insn "fma4_fmaddsubv2df4"
2177 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2181 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2182 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2183 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2190 "TARGET_FMA4 && TARGET_FUSED_MADD"
2191 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2192 [(set_attr "type" "ssemuladd")
2193 (set_attr "mode" "V2DF")])
2195 (define_insn "fma4_fmsubaddv8sf4"
2196 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2200 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2201 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2202 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2209 "TARGET_FMA4 && TARGET_FUSED_MADD"
2210 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2211 [(set_attr "type" "ssemuladd")
2212 (set_attr "mode" "V8SF")])
2214 (define_insn "fma4_fmsubaddv4df4"
2215 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2219 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2220 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2221 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2228 "TARGET_FMA4 && TARGET_FUSED_MADD"
2229 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2230 [(set_attr "type" "ssemuladd")
2231 (set_attr "mode" "V4DF")])
2233 (define_insn "fma4_fmsubaddv4sf4"
2234 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2238 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2239 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2240 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2247 "TARGET_FMA4 && TARGET_FUSED_MADD"
2248 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2249 [(set_attr "type" "ssemuladd")
2250 (set_attr "mode" "V4SF")])
2252 (define_insn "fma4_fmsubaddv2df4"
2253 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2257 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2258 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2259 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2266 "TARGET_FMA4 && TARGET_FUSED_MADD"
2267 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2268 [(set_attr "type" "ssemuladd")
2269 (set_attr "mode" "V2DF")])
2271 (define_insn "fma4i_fmaddsubv8sf4"
2272 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2277 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2278 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2279 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2286 UNSPEC_FMA4_INTRINSIC))]
2288 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2289 [(set_attr "type" "ssemuladd")
2290 (set_attr "mode" "V8SF")])
2292 (define_insn "fma4i_fmaddsubv4df4"
2293 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2298 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2299 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2300 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2307 UNSPEC_FMA4_INTRINSIC))]
2309 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2310 [(set_attr "type" "ssemuladd")
2311 (set_attr "mode" "V4DF")])
2313 (define_insn "fma4i_fmaddsubv4sf4"
2314 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2319 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2320 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2321 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2328 UNSPEC_FMA4_INTRINSIC))]
2330 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2331 [(set_attr "type" "ssemuladd")
2332 (set_attr "mode" "V4SF")])
2334 (define_insn "fma4i_fmaddsubv2df4"
2335 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2340 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2341 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2342 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2349 UNSPEC_FMA4_INTRINSIC))]
2351 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2352 [(set_attr "type" "ssemuladd")
2353 (set_attr "mode" "V2DF")])
2355 (define_insn "fma4i_fmsubaddv8sf4"
2356 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2361 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2362 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2363 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2370 UNSPEC_FMA4_INTRINSIC))]
2372 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2373 [(set_attr "type" "ssemuladd")
2374 (set_attr "mode" "V8SF")])
2376 (define_insn "fma4i_fmsubaddv4df4"
2377 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2382 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2383 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2384 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2391 UNSPEC_FMA4_INTRINSIC))]
2393 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2394 [(set_attr "type" "ssemuladd")
2395 (set_attr "mode" "V4DF")])
2397 (define_insn "fma4i_fmsubaddv4sf4"
2398 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2403 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2404 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2405 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2412 UNSPEC_FMA4_INTRINSIC))]
2414 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2415 [(set_attr "type" "ssemuladd")
2416 (set_attr "mode" "V4SF")])
2418 (define_insn "fma4i_fmsubaddv2df4"
2419 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2424 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2425 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2426 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2433 UNSPEC_FMA4_INTRINSIC))]
2435 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2436 [(set_attr "type" "ssemuladd")
2437 (set_attr "mode" "V2DF")])
2439 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2441 ;; Parallel single-precision floating point conversion operations
2443 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2445 (define_insn "sse_cvtpi2ps"
2446 [(set (match_operand:V4SF 0 "register_operand" "=x")
2449 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2450 (match_operand:V4SF 1 "register_operand" "0")
2453 "cvtpi2ps\t{%2, %0|%0, %2}"
2454 [(set_attr "type" "ssecvt")
2455 (set_attr "mode" "V4SF")])
2457 (define_insn "sse_cvtps2pi"
2458 [(set (match_operand:V2SI 0 "register_operand" "=y")
2460 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2462 (parallel [(const_int 0) (const_int 1)])))]
2464 "cvtps2pi\t{%1, %0|%0, %1}"
2465 [(set_attr "type" "ssecvt")
2466 (set_attr "unit" "mmx")
2467 (set_attr "mode" "DI")])
2469 (define_insn "sse_cvttps2pi"
2470 [(set (match_operand:V2SI 0 "register_operand" "=y")
2472 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2473 (parallel [(const_int 0) (const_int 1)])))]
2475 "cvttps2pi\t{%1, %0|%0, %1}"
2476 [(set_attr "type" "ssecvt")
2477 (set_attr "unit" "mmx")
2478 (set_attr "prefix_rep" "0")
2479 (set_attr "mode" "SF")])
2481 (define_insn "*avx_cvtsi2ss"
2482 [(set (match_operand:V4SF 0 "register_operand" "=x")
2485 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2486 (match_operand:V4SF 1 "register_operand" "x")
2489 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2490 [(set_attr "type" "sseicvt")
2491 (set_attr "prefix" "vex")
2492 (set_attr "mode" "SF")])
2494 (define_insn "sse_cvtsi2ss"
2495 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2498 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2499 (match_operand:V4SF 1 "register_operand" "0,0")
2502 "cvtsi2ss\t{%2, %0|%0, %2}"
2503 [(set_attr "type" "sseicvt")
2504 (set_attr "athlon_decode" "vector,double")
2505 (set_attr "amdfam10_decode" "vector,double")
2506 (set_attr "mode" "SF")])
2508 (define_insn "*avx_cvtsi2ssq"
2509 [(set (match_operand:V4SF 0 "register_operand" "=x")
2512 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2513 (match_operand:V4SF 1 "register_operand" "x")
2515 "TARGET_AVX && TARGET_64BIT"
2516 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2517 [(set_attr "type" "sseicvt")
2518 (set_attr "length_vex" "4")
2519 (set_attr "prefix" "vex")
2520 (set_attr "mode" "SF")])
2522 (define_insn "sse_cvtsi2ssq"
2523 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2526 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2527 (match_operand:V4SF 1 "register_operand" "0,0")
2529 "TARGET_SSE && TARGET_64BIT"
2530 "cvtsi2ssq\t{%2, %0|%0, %2}"
2531 [(set_attr "type" "sseicvt")
2532 (set_attr "prefix_rex" "1")
2533 (set_attr "athlon_decode" "vector,double")
2534 (set_attr "amdfam10_decode" "vector,double")
2535 (set_attr "mode" "SF")])
2537 (define_insn "sse_cvtss2si"
2538 [(set (match_operand:SI 0 "register_operand" "=r,r")
2541 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2542 (parallel [(const_int 0)]))]
2543 UNSPEC_FIX_NOTRUNC))]
2545 "%vcvtss2si\t{%1, %0|%0, %1}"
2546 [(set_attr "type" "sseicvt")
2547 (set_attr "athlon_decode" "double,vector")
2548 (set_attr "prefix_rep" "1")
2549 (set_attr "prefix" "maybe_vex")
2550 (set_attr "mode" "SI")])
2552 (define_insn "sse_cvtss2si_2"
2553 [(set (match_operand:SI 0 "register_operand" "=r,r")
2554 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2555 UNSPEC_FIX_NOTRUNC))]
2557 "%vcvtss2si\t{%1, %0|%0, %1}"
2558 [(set_attr "type" "sseicvt")
2559 (set_attr "athlon_decode" "double,vector")
2560 (set_attr "amdfam10_decode" "double,double")
2561 (set_attr "prefix_rep" "1")
2562 (set_attr "prefix" "maybe_vex")
2563 (set_attr "mode" "SI")])
2565 (define_insn "sse_cvtss2siq"
2566 [(set (match_operand:DI 0 "register_operand" "=r,r")
2569 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2570 (parallel [(const_int 0)]))]
2571 UNSPEC_FIX_NOTRUNC))]
2572 "TARGET_SSE && TARGET_64BIT"
2573 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2574 [(set_attr "type" "sseicvt")
2575 (set_attr "athlon_decode" "double,vector")
2576 (set_attr "prefix_rep" "1")
2577 (set_attr "prefix" "maybe_vex")
2578 (set_attr "mode" "DI")])
2580 (define_insn "sse_cvtss2siq_2"
2581 [(set (match_operand:DI 0 "register_operand" "=r,r")
2582 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2583 UNSPEC_FIX_NOTRUNC))]
2584 "TARGET_SSE && TARGET_64BIT"
2585 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2586 [(set_attr "type" "sseicvt")
2587 (set_attr "athlon_decode" "double,vector")
2588 (set_attr "amdfam10_decode" "double,double")
2589 (set_attr "prefix_rep" "1")
2590 (set_attr "prefix" "maybe_vex")
2591 (set_attr "mode" "DI")])
2593 (define_insn "sse_cvttss2si"
2594 [(set (match_operand:SI 0 "register_operand" "=r,r")
2597 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2598 (parallel [(const_int 0)]))))]
2600 "%vcvttss2si\t{%1, %0|%0, %1}"
2601 [(set_attr "type" "sseicvt")
2602 (set_attr "athlon_decode" "double,vector")
2603 (set_attr "amdfam10_decode" "double,double")
2604 (set_attr "prefix_rep" "1")
2605 (set_attr "prefix" "maybe_vex")
2606 (set_attr "mode" "SI")])
2608 (define_insn "sse_cvttss2siq"
2609 [(set (match_operand:DI 0 "register_operand" "=r,r")
2612 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2613 (parallel [(const_int 0)]))))]
2614 "TARGET_SSE && TARGET_64BIT"
2615 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2616 [(set_attr "type" "sseicvt")
2617 (set_attr "athlon_decode" "double,vector")
2618 (set_attr "amdfam10_decode" "double,double")
2619 (set_attr "prefix_rep" "1")
2620 (set_attr "prefix" "maybe_vex")
2621 (set_attr "mode" "DI")])
2623 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2624 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2625 (float:AVXMODEDCVTDQ2PS
2626 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2628 "vcvtdq2ps\t{%1, %0|%0, %1}"
2629 [(set_attr "type" "ssecvt")
2630 (set_attr "prefix" "vex")
2631 (set_attr "mode" "<avxvecmode>")])
2633 (define_insn "sse2_cvtdq2ps"
2634 [(set (match_operand:V4SF 0 "register_operand" "=x")
2635 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2637 "cvtdq2ps\t{%1, %0|%0, %1}"
2638 [(set_attr "type" "ssecvt")
2639 (set_attr "mode" "V4SF")])
2641 (define_expand "sse2_cvtudq2ps"
2643 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2645 (lt:V4SF (match_dup 5) (match_dup 3)))
2647 (and:V4SF (match_dup 6) (match_dup 4)))
2648 (set (match_operand:V4SF 0 "register_operand" "")
2649 (plus:V4SF (match_dup 5) (match_dup 7)))]
2652 REAL_VALUE_TYPE TWO32r;
2656 real_ldexp (&TWO32r, &dconst1, 32);
2657 x = const_double_from_real_value (TWO32r, SFmode);
2659 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2660 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2662 for (i = 5; i < 8; i++)
2663 operands[i] = gen_reg_rtx (V4SFmode);
2666 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2667 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2668 (unspec:AVXMODEDCVTPS2DQ
2669 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2670 UNSPEC_FIX_NOTRUNC))]
2672 "vcvtps2dq\t{%1, %0|%0, %1}"
2673 [(set_attr "type" "ssecvt")
2674 (set_attr "prefix" "vex")
2675 (set_attr "mode" "<avxvecmode>")])
2677 (define_insn "sse2_cvtps2dq"
2678 [(set (match_operand:V4SI 0 "register_operand" "=x")
2679 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2680 UNSPEC_FIX_NOTRUNC))]
2682 "cvtps2dq\t{%1, %0|%0, %1}"
2683 [(set_attr "type" "ssecvt")
2684 (set_attr "prefix_data16" "1")
2685 (set_attr "mode" "TI")])
2687 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2688 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2689 (fix:AVXMODEDCVTPS2DQ
2690 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2692 "vcvttps2dq\t{%1, %0|%0, %1}"
2693 [(set_attr "type" "ssecvt")
2694 (set_attr "prefix" "vex")
2695 (set_attr "mode" "<avxvecmode>")])
2697 (define_insn "sse2_cvttps2dq"
2698 [(set (match_operand:V4SI 0 "register_operand" "=x")
2699 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2701 "cvttps2dq\t{%1, %0|%0, %1}"
2702 [(set_attr "type" "ssecvt")
2703 (set_attr "prefix_rep" "1")
2704 (set_attr "prefix_data16" "0")
2705 (set_attr "mode" "TI")])
2707 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2709 ;; Parallel double-precision floating point conversion operations
2711 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2713 (define_insn "sse2_cvtpi2pd"
2714 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2715 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2717 "cvtpi2pd\t{%1, %0|%0, %1}"
2718 [(set_attr "type" "ssecvt")
2719 (set_attr "unit" "mmx,*")
2720 (set_attr "prefix_data16" "1,*")
2721 (set_attr "mode" "V2DF")])
2723 (define_insn "sse2_cvtpd2pi"
2724 [(set (match_operand:V2SI 0 "register_operand" "=y")
2725 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2726 UNSPEC_FIX_NOTRUNC))]
2728 "cvtpd2pi\t{%1, %0|%0, %1}"
2729 [(set_attr "type" "ssecvt")
2730 (set_attr "unit" "mmx")
2731 (set_attr "prefix_data16" "1")
2732 (set_attr "mode" "DI")])
2734 (define_insn "sse2_cvttpd2pi"
2735 [(set (match_operand:V2SI 0 "register_operand" "=y")
2736 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2738 "cvttpd2pi\t{%1, %0|%0, %1}"
2739 [(set_attr "type" "ssecvt")
2740 (set_attr "unit" "mmx")
2741 (set_attr "prefix_data16" "1")
2742 (set_attr "mode" "TI")])
2744 (define_insn "*avx_cvtsi2sd"
2745 [(set (match_operand:V2DF 0 "register_operand" "=x")
2748 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2749 (match_operand:V2DF 1 "register_operand" "x")
2752 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2753 [(set_attr "type" "sseicvt")
2754 (set_attr "prefix" "vex")
2755 (set_attr "mode" "DF")])
2757 (define_insn "sse2_cvtsi2sd"
2758 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2761 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2762 (match_operand:V2DF 1 "register_operand" "0,0")
2765 "cvtsi2sd\t{%2, %0|%0, %2}"
2766 [(set_attr "type" "sseicvt")
2767 (set_attr "mode" "DF")
2768 (set_attr "athlon_decode" "double,direct")
2769 (set_attr "amdfam10_decode" "vector,double")])
2771 (define_insn "*avx_cvtsi2sdq"
2772 [(set (match_operand:V2DF 0 "register_operand" "=x")
2775 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2776 (match_operand:V2DF 1 "register_operand" "x")
2778 "TARGET_AVX && TARGET_64BIT"
2779 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2780 [(set_attr "type" "sseicvt")
2781 (set_attr "length_vex" "4")
2782 (set_attr "prefix" "vex")
2783 (set_attr "mode" "DF")])
2785 (define_insn "sse2_cvtsi2sdq"
2786 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2789 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2790 (match_operand:V2DF 1 "register_operand" "0,0")
2792 "TARGET_SSE2 && TARGET_64BIT"
2793 "cvtsi2sdq\t{%2, %0|%0, %2}"
2794 [(set_attr "type" "sseicvt")
2795 (set_attr "prefix_rex" "1")
2796 (set_attr "mode" "DF")
2797 (set_attr "athlon_decode" "double,direct")
2798 (set_attr "amdfam10_decode" "vector,double")])
2800 (define_insn "sse2_cvtsd2si"
2801 [(set (match_operand:SI 0 "register_operand" "=r,r")
2804 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2805 (parallel [(const_int 0)]))]
2806 UNSPEC_FIX_NOTRUNC))]
2808 "%vcvtsd2si\t{%1, %0|%0, %1}"
2809 [(set_attr "type" "sseicvt")
2810 (set_attr "athlon_decode" "double,vector")
2811 (set_attr "prefix_rep" "1")
2812 (set_attr "prefix" "maybe_vex")
2813 (set_attr "mode" "SI")])
2815 (define_insn "sse2_cvtsd2si_2"
2816 [(set (match_operand:SI 0 "register_operand" "=r,r")
2817 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2818 UNSPEC_FIX_NOTRUNC))]
2820 "%vcvtsd2si\t{%1, %0|%0, %1}"
2821 [(set_attr "type" "sseicvt")
2822 (set_attr "athlon_decode" "double,vector")
2823 (set_attr "amdfam10_decode" "double,double")
2824 (set_attr "prefix_rep" "1")
2825 (set_attr "prefix" "maybe_vex")
2826 (set_attr "mode" "SI")])
2828 (define_insn "sse2_cvtsd2siq"
2829 [(set (match_operand:DI 0 "register_operand" "=r,r")
2832 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2833 (parallel [(const_int 0)]))]
2834 UNSPEC_FIX_NOTRUNC))]
2835 "TARGET_SSE2 && TARGET_64BIT"
2836 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2837 [(set_attr "type" "sseicvt")
2838 (set_attr "athlon_decode" "double,vector")
2839 (set_attr "prefix_rep" "1")
2840 (set_attr "prefix" "maybe_vex")
2841 (set_attr "mode" "DI")])
2843 (define_insn "sse2_cvtsd2siq_2"
2844 [(set (match_operand:DI 0 "register_operand" "=r,r")
2845 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2846 UNSPEC_FIX_NOTRUNC))]
2847 "TARGET_SSE2 && TARGET_64BIT"
2848 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2849 [(set_attr "type" "sseicvt")
2850 (set_attr "athlon_decode" "double,vector")
2851 (set_attr "amdfam10_decode" "double,double")
2852 (set_attr "prefix_rep" "1")
2853 (set_attr "prefix" "maybe_vex")
2854 (set_attr "mode" "DI")])
2856 (define_insn "sse2_cvttsd2si"
2857 [(set (match_operand:SI 0 "register_operand" "=r,r")
2860 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2861 (parallel [(const_int 0)]))))]
2863 "%vcvttsd2si\t{%1, %0|%0, %1}"
2864 [(set_attr "type" "sseicvt")
2865 (set_attr "prefix_rep" "1")
2866 (set_attr "prefix" "maybe_vex")
2867 (set_attr "mode" "SI")
2868 (set_attr "athlon_decode" "double,vector")
2869 (set_attr "amdfam10_decode" "double,double")])
2871 (define_insn "sse2_cvttsd2siq"
2872 [(set (match_operand:DI 0 "register_operand" "=r,r")
2875 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2876 (parallel [(const_int 0)]))))]
2877 "TARGET_SSE2 && TARGET_64BIT"
2878 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2879 [(set_attr "type" "sseicvt")
2880 (set_attr "prefix_rep" "1")
2881 (set_attr "prefix" "maybe_vex")
2882 (set_attr "mode" "DI")
2883 (set_attr "athlon_decode" "double,vector")
2884 (set_attr "amdfam10_decode" "double,double")])
2886 (define_insn "avx_cvtdq2pd256"
2887 [(set (match_operand:V4DF 0 "register_operand" "=x")
2888 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2890 "vcvtdq2pd\t{%1, %0|%0, %1}"
2891 [(set_attr "type" "ssecvt")
2892 (set_attr "prefix" "vex")
2893 (set_attr "mode" "V4DF")])
2895 (define_insn "sse2_cvtdq2pd"
2896 [(set (match_operand:V2DF 0 "register_operand" "=x")
2899 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2900 (parallel [(const_int 0) (const_int 1)]))))]
2902 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2903 [(set_attr "type" "ssecvt")
2904 (set_attr "prefix" "maybe_vex")
2905 (set_attr "mode" "V2DF")])
2907 (define_insn "avx_cvtpd2dq256"
2908 [(set (match_operand:V4SI 0 "register_operand" "=x")
2909 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2910 UNSPEC_FIX_NOTRUNC))]
2912 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2913 [(set_attr "type" "ssecvt")
2914 (set_attr "prefix" "vex")
2915 (set_attr "mode" "OI")])
2917 (define_expand "sse2_cvtpd2dq"
2918 [(set (match_operand:V4SI 0 "register_operand" "")
2920 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2924 "operands[2] = CONST0_RTX (V2SImode);")
2926 (define_insn "*sse2_cvtpd2dq"
2927 [(set (match_operand:V4SI 0 "register_operand" "=x")
2929 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2931 (match_operand:V2SI 2 "const0_operand" "")))]
2933 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2934 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2935 [(set_attr "type" "ssecvt")
2936 (set_attr "prefix_rep" "1")
2937 (set_attr "prefix_data16" "0")
2938 (set_attr "prefix" "maybe_vex")
2939 (set_attr "mode" "TI")
2940 (set_attr "amdfam10_decode" "double")])
2942 (define_insn "avx_cvttpd2dq256"
2943 [(set (match_operand:V4SI 0 "register_operand" "=x")
2944 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2946 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2947 [(set_attr "type" "ssecvt")
2948 (set_attr "prefix" "vex")
2949 (set_attr "mode" "OI")])
2951 (define_expand "sse2_cvttpd2dq"
2952 [(set (match_operand:V4SI 0 "register_operand" "")
2954 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2957 "operands[2] = CONST0_RTX (V2SImode);")
2959 (define_insn "*sse2_cvttpd2dq"
2960 [(set (match_operand:V4SI 0 "register_operand" "=x")
2962 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2963 (match_operand:V2SI 2 "const0_operand" "")))]
2965 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2966 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2967 [(set_attr "type" "ssecvt")
2968 (set_attr "prefix" "maybe_vex")
2969 (set_attr "mode" "TI")
2970 (set_attr "amdfam10_decode" "double")])
2972 (define_insn "*avx_cvtsd2ss"
2973 [(set (match_operand:V4SF 0 "register_operand" "=x")
2976 (float_truncate:V2SF
2977 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2978 (match_operand:V4SF 1 "register_operand" "x")
2981 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2982 [(set_attr "type" "ssecvt")
2983 (set_attr "prefix" "vex")
2984 (set_attr "mode" "SF")])
2986 (define_insn "sse2_cvtsd2ss"
2987 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2990 (float_truncate:V2SF
2991 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2992 (match_operand:V4SF 1 "register_operand" "0,0")
2995 "cvtsd2ss\t{%2, %0|%0, %2}"
2996 [(set_attr "type" "ssecvt")
2997 (set_attr "athlon_decode" "vector,double")
2998 (set_attr "amdfam10_decode" "vector,double")
2999 (set_attr "mode" "SF")])
3001 (define_insn "*avx_cvtss2sd"
3002 [(set (match_operand:V2DF 0 "register_operand" "=x")
3006 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
3007 (parallel [(const_int 0) (const_int 1)])))
3008 (match_operand:V2DF 1 "register_operand" "x")
3011 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
3012 [(set_attr "type" "ssecvt")
3013 (set_attr "prefix" "vex")
3014 (set_attr "mode" "DF")])
3016 (define_insn "sse2_cvtss2sd"
3017 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3021 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
3022 (parallel [(const_int 0) (const_int 1)])))
3023 (match_operand:V2DF 1 "register_operand" "0,0")
3026 "cvtss2sd\t{%2, %0|%0, %2}"
3027 [(set_attr "type" "ssecvt")
3028 (set_attr "amdfam10_decode" "vector,double")
3029 (set_attr "mode" "DF")])
3031 (define_insn "avx_cvtpd2ps256"
3032 [(set (match_operand:V4SF 0 "register_operand" "=x")
3033 (float_truncate:V4SF
3034 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3036 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3037 [(set_attr "type" "ssecvt")
3038 (set_attr "prefix" "vex")
3039 (set_attr "mode" "V4SF")])
3041 (define_expand "sse2_cvtpd2ps"
3042 [(set (match_operand:V4SF 0 "register_operand" "")
3044 (float_truncate:V2SF
3045 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3048 "operands[2] = CONST0_RTX (V2SFmode);")
3050 (define_insn "*sse2_cvtpd2ps"
3051 [(set (match_operand:V4SF 0 "register_operand" "=x")
3053 (float_truncate:V2SF
3054 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3055 (match_operand:V2SF 2 "const0_operand" "")))]
3057 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3058 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3059 [(set_attr "type" "ssecvt")
3060 (set_attr "prefix_data16" "1")
3061 (set_attr "prefix" "maybe_vex")
3062 (set_attr "mode" "V4SF")
3063 (set_attr "amdfam10_decode" "double")])
3065 (define_insn "avx_cvtps2pd256"
3066 [(set (match_operand:V4DF 0 "register_operand" "=x")
3068 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3070 "vcvtps2pd\t{%1, %0|%0, %1}"
3071 [(set_attr "type" "ssecvt")
3072 (set_attr "prefix" "vex")
3073 (set_attr "mode" "V4DF")])
3075 (define_insn "sse2_cvtps2pd"
3076 [(set (match_operand:V2DF 0 "register_operand" "=x")
3079 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3080 (parallel [(const_int 0) (const_int 1)]))))]
3082 "%vcvtps2pd\t{%1, %0|%0, %1}"
3083 [(set_attr "type" "ssecvt")
3084 (set_attr "prefix" "maybe_vex")
3085 (set_attr "mode" "V2DF")
3086 (set_attr "prefix_data16" "0")
3087 (set_attr "amdfam10_decode" "direct")])
3089 (define_expand "vec_unpacks_hi_v4sf"
3094 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3095 (parallel [(const_int 6)
3099 (set (match_operand:V2DF 0 "register_operand" "")
3103 (parallel [(const_int 0) (const_int 1)]))))]
3105 "operands[2] = gen_reg_rtx (V4SFmode);")
3107 (define_expand "vec_unpacks_lo_v4sf"
3108 [(set (match_operand:V2DF 0 "register_operand" "")
3111 (match_operand:V4SF 1 "nonimmediate_operand" "")
3112 (parallel [(const_int 0) (const_int 1)]))))]
3115 (define_expand "vec_unpacks_float_hi_v8hi"
3116 [(match_operand:V4SF 0 "register_operand" "")
3117 (match_operand:V8HI 1 "register_operand" "")]
3120 rtx tmp = gen_reg_rtx (V4SImode);
3122 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3123 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3127 (define_expand "vec_unpacks_float_lo_v8hi"
3128 [(match_operand:V4SF 0 "register_operand" "")
3129 (match_operand:V8HI 1 "register_operand" "")]
3132 rtx tmp = gen_reg_rtx (V4SImode);
3134 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3135 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3139 (define_expand "vec_unpacku_float_hi_v8hi"
3140 [(match_operand:V4SF 0 "register_operand" "")
3141 (match_operand:V8HI 1 "register_operand" "")]
3144 rtx tmp = gen_reg_rtx (V4SImode);
3146 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3147 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3151 (define_expand "vec_unpacku_float_lo_v8hi"
3152 [(match_operand:V4SF 0 "register_operand" "")
3153 (match_operand:V8HI 1 "register_operand" "")]
3156 rtx tmp = gen_reg_rtx (V4SImode);
3158 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3159 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3163 (define_expand "vec_unpacks_float_hi_v4si"
3166 (match_operand:V4SI 1 "nonimmediate_operand" "")
3167 (parallel [(const_int 2)
3171 (set (match_operand:V2DF 0 "register_operand" "")
3175 (parallel [(const_int 0) (const_int 1)]))))]
3177 "operands[2] = gen_reg_rtx (V4SImode);")
3179 (define_expand "vec_unpacks_float_lo_v4si"
3180 [(set (match_operand:V2DF 0 "register_operand" "")
3183 (match_operand:V4SI 1 "nonimmediate_operand" "")
3184 (parallel [(const_int 0) (const_int 1)]))))]
3187 (define_expand "vec_unpacku_float_hi_v4si"
3190 (match_operand:V4SI 1 "nonimmediate_operand" "")
3191 (parallel [(const_int 2)
3199 (parallel [(const_int 0) (const_int 1)]))))
3201 (lt:V2DF (match_dup 6) (match_dup 3)))
3203 (and:V2DF (match_dup 7) (match_dup 4)))
3204 (set (match_operand:V2DF 0 "register_operand" "")
3205 (plus:V2DF (match_dup 6) (match_dup 8)))]
3208 REAL_VALUE_TYPE TWO32r;
3212 real_ldexp (&TWO32r, &dconst1, 32);
3213 x = const_double_from_real_value (TWO32r, DFmode);
3215 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3216 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3218 operands[5] = gen_reg_rtx (V4SImode);
3220 for (i = 6; i < 9; i++)
3221 operands[i] = gen_reg_rtx (V2DFmode);
3224 (define_expand "vec_unpacku_float_lo_v4si"
3228 (match_operand:V4SI 1 "nonimmediate_operand" "")
3229 (parallel [(const_int 0) (const_int 1)]))))
3231 (lt:V2DF (match_dup 5) (match_dup 3)))
3233 (and:V2DF (match_dup 6) (match_dup 4)))
3234 (set (match_operand:V2DF 0 "register_operand" "")
3235 (plus:V2DF (match_dup 5) (match_dup 7)))]
3238 REAL_VALUE_TYPE TWO32r;
3242 real_ldexp (&TWO32r, &dconst1, 32);
3243 x = const_double_from_real_value (TWO32r, DFmode);
3245 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3246 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3248 for (i = 5; i < 8; i++)
3249 operands[i] = gen_reg_rtx (V2DFmode);
3252 (define_expand "vec_pack_trunc_v2df"
3253 [(match_operand:V4SF 0 "register_operand" "")
3254 (match_operand:V2DF 1 "nonimmediate_operand" "")
3255 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3260 r1 = gen_reg_rtx (V4SFmode);
3261 r2 = gen_reg_rtx (V4SFmode);
3263 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3264 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3265 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3269 (define_expand "vec_pack_sfix_trunc_v2df"
3270 [(match_operand:V4SI 0 "register_operand" "")
3271 (match_operand:V2DF 1 "nonimmediate_operand" "")
3272 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3277 r1 = gen_reg_rtx (V4SImode);
3278 r2 = gen_reg_rtx (V4SImode);
3280 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3281 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3282 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3283 gen_lowpart (V2DImode, r1),
3284 gen_lowpart (V2DImode, r2)));
3288 (define_expand "vec_pack_sfix_v2df"
3289 [(match_operand:V4SI 0 "register_operand" "")
3290 (match_operand:V2DF 1 "nonimmediate_operand" "")
3291 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3296 r1 = gen_reg_rtx (V4SImode);
3297 r2 = gen_reg_rtx (V4SImode);
3299 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3300 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3301 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3302 gen_lowpart (V2DImode, r1),
3303 gen_lowpart (V2DImode, r2)));
3307 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3309 ;; Parallel single-precision floating point element swizzling
3311 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3313 (define_expand "sse_movhlps_exp"
3314 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3317 (match_operand:V4SF 1 "nonimmediate_operand" "")
3318 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3319 (parallel [(const_int 6)
3324 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3326 (define_insn "*avx_movhlps"
3327 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3330 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3331 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3332 (parallel [(const_int 6)
3336 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3338 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3339 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3340 vmovhps\t{%2, %0|%0, %2}"
3341 [(set_attr "type" "ssemov")
3342 (set_attr "prefix" "vex")
3343 (set_attr "mode" "V4SF,V2SF,V2SF")])
3345 (define_insn "sse_movhlps"
3346 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3349 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3350 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3351 (parallel [(const_int 6)
3355 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3357 movhlps\t{%2, %0|%0, %2}
3358 movlps\t{%H2, %0|%0, %H2}
3359 movhps\t{%2, %0|%0, %2}"
3360 [(set_attr "type" "ssemov")
3361 (set_attr "mode" "V4SF,V2SF,V2SF")])
3363 (define_expand "sse_movlhps_exp"
3364 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3367 (match_operand:V4SF 1 "nonimmediate_operand" "")
3368 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3369 (parallel [(const_int 0)
3374 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3376 (define_insn "*avx_movlhps"
3377 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3380 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3381 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3382 (parallel [(const_int 0)
3386 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3388 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3389 vmovhps\t{%2, %1, %0|%0, %1, %2}
3390 vmovlps\t{%2, %H0|%H0, %2}"
3391 [(set_attr "type" "ssemov")
3392 (set_attr "prefix" "vex")
3393 (set_attr "mode" "V4SF,V2SF,V2SF")])
3395 (define_insn "sse_movlhps"
3396 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3399 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3400 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3401 (parallel [(const_int 0)
3405 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3407 movlhps\t{%2, %0|%0, %2}
3408 movhps\t{%2, %0|%0, %2}
3409 movlps\t{%2, %H0|%H0, %2}"
3410 [(set_attr "type" "ssemov")
3411 (set_attr "mode" "V4SF,V2SF,V2SF")])
3413 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3414 (define_insn "avx_unpckhps256"
3415 [(set (match_operand:V8SF 0 "register_operand" "=x")
3418 (match_operand:V8SF 1 "register_operand" "x")
3419 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3420 (parallel [(const_int 2) (const_int 10)
3421 (const_int 3) (const_int 11)
3422 (const_int 6) (const_int 14)
3423 (const_int 7) (const_int 15)])))]
3425 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3426 [(set_attr "type" "sselog")
3427 (set_attr "prefix" "vex")
3428 (set_attr "mode" "V8SF")])
3430 (define_insn "*avx_interleave_highv4sf"
3431 [(set (match_operand:V4SF 0 "register_operand" "=x")
3434 (match_operand:V4SF 1 "register_operand" "x")
3435 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3436 (parallel [(const_int 2) (const_int 6)
3437 (const_int 3) (const_int 7)])))]
3439 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3440 [(set_attr "type" "sselog")
3441 (set_attr "prefix" "vex")
3442 (set_attr "mode" "V4SF")])
3444 (define_insn "vec_interleave_highv4sf"
3445 [(set (match_operand:V4SF 0 "register_operand" "=x")
3448 (match_operand:V4SF 1 "register_operand" "0")
3449 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3450 (parallel [(const_int 2) (const_int 6)
3451 (const_int 3) (const_int 7)])))]
3453 "unpckhps\t{%2, %0|%0, %2}"
3454 [(set_attr "type" "sselog")
3455 (set_attr "mode" "V4SF")])
3457 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3458 (define_insn "avx_unpcklps256"
3459 [(set (match_operand:V8SF 0 "register_operand" "=x")
3462 (match_operand:V8SF 1 "register_operand" "x")
3463 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3464 (parallel [(const_int 0) (const_int 8)
3465 (const_int 1) (const_int 9)
3466 (const_int 4) (const_int 12)
3467 (const_int 5) (const_int 13)])))]
3469 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3470 [(set_attr "type" "sselog")
3471 (set_attr "prefix" "vex")
3472 (set_attr "mode" "V8SF")])
3474 (define_insn "*avx_interleave_lowv4sf"
3475 [(set (match_operand:V4SF 0 "register_operand" "=x")
3478 (match_operand:V4SF 1 "register_operand" "x")
3479 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3480 (parallel [(const_int 0) (const_int 4)
3481 (const_int 1) (const_int 5)])))]
3483 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3484 [(set_attr "type" "sselog")
3485 (set_attr "prefix" "vex")
3486 (set_attr "mode" "V4SF")])
3488 (define_insn "vec_interleave_lowv4sf"
3489 [(set (match_operand:V4SF 0 "register_operand" "=x")
3492 (match_operand:V4SF 1 "register_operand" "0")
3493 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3494 (parallel [(const_int 0) (const_int 4)
3495 (const_int 1) (const_int 5)])))]
3497 "unpcklps\t{%2, %0|%0, %2}"
3498 [(set_attr "type" "sselog")
3499 (set_attr "mode" "V4SF")])
3501 ;; These are modeled with the same vec_concat as the others so that we
3502 ;; capture users of shufps that can use the new instructions
3503 (define_insn "avx_movshdup256"
3504 [(set (match_operand:V8SF 0 "register_operand" "=x")
3507 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3509 (parallel [(const_int 1) (const_int 1)
3510 (const_int 3) (const_int 3)
3511 (const_int 5) (const_int 5)
3512 (const_int 7) (const_int 7)])))]
3514 "vmovshdup\t{%1, %0|%0, %1}"
3515 [(set_attr "type" "sse")
3516 (set_attr "prefix" "vex")
3517 (set_attr "mode" "V8SF")])
3519 (define_insn "sse3_movshdup"
3520 [(set (match_operand:V4SF 0 "register_operand" "=x")
3523 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3525 (parallel [(const_int 1)
3530 "%vmovshdup\t{%1, %0|%0, %1}"
3531 [(set_attr "type" "sse")
3532 (set_attr "prefix_rep" "1")
3533 (set_attr "prefix" "maybe_vex")
3534 (set_attr "mode" "V4SF")])
3536 (define_insn "avx_movsldup256"
3537 [(set (match_operand:V8SF 0 "register_operand" "=x")
3540 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3542 (parallel [(const_int 0) (const_int 0)
3543 (const_int 2) (const_int 2)
3544 (const_int 4) (const_int 4)
3545 (const_int 6) (const_int 6)])))]
3547 "vmovsldup\t{%1, %0|%0, %1}"
3548 [(set_attr "type" "sse")
3549 (set_attr "prefix" "vex")
3550 (set_attr "mode" "V8SF")])
3552 (define_insn "sse3_movsldup"
3553 [(set (match_operand:V4SF 0 "register_operand" "=x")
3556 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3558 (parallel [(const_int 0)
3563 "%vmovsldup\t{%1, %0|%0, %1}"
3564 [(set_attr "type" "sse")
3565 (set_attr "prefix_rep" "1")
3566 (set_attr "prefix" "maybe_vex")
3567 (set_attr "mode" "V4SF")])
3569 (define_expand "avx_shufps256"
3570 [(match_operand:V8SF 0 "register_operand" "")
3571 (match_operand:V8SF 1 "register_operand" "")
3572 (match_operand:V8SF 2 "nonimmediate_operand" "")
3573 (match_operand:SI 3 "const_int_operand" "")]
3576 int mask = INTVAL (operands[3]);
3577 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3578 GEN_INT ((mask >> 0) & 3),
3579 GEN_INT ((mask >> 2) & 3),
3580 GEN_INT (((mask >> 4) & 3) + 8),
3581 GEN_INT (((mask >> 6) & 3) + 8),
3582 GEN_INT (((mask >> 0) & 3) + 4),
3583 GEN_INT (((mask >> 2) & 3) + 4),
3584 GEN_INT (((mask >> 4) & 3) + 12),
3585 GEN_INT (((mask >> 6) & 3) + 12)));
3589 ;; One bit in mask selects 2 elements.
3590 (define_insn "avx_shufps256_1"
3591 [(set (match_operand:V8SF 0 "register_operand" "=x")
3594 (match_operand:V8SF 1 "register_operand" "x")
3595 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3596 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3597 (match_operand 4 "const_0_to_3_operand" "")
3598 (match_operand 5 "const_8_to_11_operand" "")
3599 (match_operand 6 "const_8_to_11_operand" "")
3600 (match_operand 7 "const_4_to_7_operand" "")
3601 (match_operand 8 "const_4_to_7_operand" "")
3602 (match_operand 9 "const_12_to_15_operand" "")
3603 (match_operand 10 "const_12_to_15_operand" "")])))]
3605 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3606 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3607 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3608 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3611 mask = INTVAL (operands[3]);
3612 mask |= INTVAL (operands[4]) << 2;
3613 mask |= (INTVAL (operands[5]) - 8) << 4;
3614 mask |= (INTVAL (operands[6]) - 8) << 6;
3615 operands[3] = GEN_INT (mask);
3617 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3619 [(set_attr "type" "sselog")
3620 (set_attr "length_immediate" "1")
3621 (set_attr "prefix" "vex")
3622 (set_attr "mode" "V8SF")])
3624 (define_expand "sse_shufps"
3625 [(match_operand:V4SF 0 "register_operand" "")
3626 (match_operand:V4SF 1 "register_operand" "")
3627 (match_operand:V4SF 2 "nonimmediate_operand" "")
3628 (match_operand:SI 3 "const_int_operand" "")]
3631 int mask = INTVAL (operands[3]);
3632 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3633 GEN_INT ((mask >> 0) & 3),
3634 GEN_INT ((mask >> 2) & 3),
3635 GEN_INT (((mask >> 4) & 3) + 4),
3636 GEN_INT (((mask >> 6) & 3) + 4)));
3640 (define_insn "*avx_shufps_<mode>"
3641 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3642 (vec_select:SSEMODE4S
3643 (vec_concat:<ssedoublesizemode>
3644 (match_operand:SSEMODE4S 1 "register_operand" "x")
3645 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3646 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3647 (match_operand 4 "const_0_to_3_operand" "")
3648 (match_operand 5 "const_4_to_7_operand" "")
3649 (match_operand 6 "const_4_to_7_operand" "")])))]
3653 mask |= INTVAL (operands[3]) << 0;
3654 mask |= INTVAL (operands[4]) << 2;
3655 mask |= (INTVAL (operands[5]) - 4) << 4;
3656 mask |= (INTVAL (operands[6]) - 4) << 6;
3657 operands[3] = GEN_INT (mask);
3659 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3661 [(set_attr "type" "sselog")
3662 (set_attr "length_immediate" "1")
3663 (set_attr "prefix" "vex")
3664 (set_attr "mode" "V4SF")])
3666 (define_insn "sse_shufps_<mode>"
3667 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3668 (vec_select:SSEMODE4S
3669 (vec_concat:<ssedoublesizemode>
3670 (match_operand:SSEMODE4S 1 "register_operand" "0")
3671 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3672 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3673 (match_operand 4 "const_0_to_3_operand" "")
3674 (match_operand 5 "const_4_to_7_operand" "")
3675 (match_operand 6 "const_4_to_7_operand" "")])))]
3679 mask |= INTVAL (operands[3]) << 0;
3680 mask |= INTVAL (operands[4]) << 2;
3681 mask |= (INTVAL (operands[5]) - 4) << 4;
3682 mask |= (INTVAL (operands[6]) - 4) << 6;
3683 operands[3] = GEN_INT (mask);
3685 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3687 [(set_attr "type" "sselog")
3688 (set_attr "length_immediate" "1")
3689 (set_attr "mode" "V4SF")])
3691 (define_insn "sse_storehps"
3692 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3694 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3695 (parallel [(const_int 2) (const_int 3)])))]
3698 %vmovhps\t{%1, %0|%0, %1}
3699 %vmovhlps\t{%1, %d0|%d0, %1}
3700 %vmovlps\t{%H1, %d0|%d0, %H1}"
3701 [(set_attr "type" "ssemov")
3702 (set_attr "prefix" "maybe_vex")
3703 (set_attr "mode" "V2SF,V4SF,V2SF")])
3705 (define_expand "sse_loadhps_exp"
3706 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3709 (match_operand:V4SF 1 "nonimmediate_operand" "")
3710 (parallel [(const_int 0) (const_int 1)]))
3711 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3713 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3715 (define_insn "*avx_loadhps"
3716 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3719 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3720 (parallel [(const_int 0) (const_int 1)]))
3721 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3724 vmovhps\t{%2, %1, %0|%0, %1, %2}
3725 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3726 vmovlps\t{%2, %H0|%H0, %2}"
3727 [(set_attr "type" "ssemov")
3728 (set_attr "prefix" "vex")
3729 (set_attr "mode" "V2SF,V4SF,V2SF")])
3731 (define_insn "sse_loadhps"
3732 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3735 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3736 (parallel [(const_int 0) (const_int 1)]))
3737 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3740 movhps\t{%2, %0|%0, %2}
3741 movlhps\t{%2, %0|%0, %2}
3742 movlps\t{%2, %H0|%H0, %2}"
3743 [(set_attr "type" "ssemov")
3744 (set_attr "mode" "V2SF,V4SF,V2SF")])
3746 (define_insn "*avx_storelps"
3747 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3749 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3750 (parallel [(const_int 0) (const_int 1)])))]
3753 vmovlps\t{%1, %0|%0, %1}
3754 vmovaps\t{%1, %0|%0, %1}
3755 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3756 [(set_attr "type" "ssemov")
3757 (set_attr "prefix" "vex")
3758 (set_attr "mode" "V2SF,V2DF,V2SF")])
3760 (define_insn "sse_storelps"
3761 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3763 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3764 (parallel [(const_int 0) (const_int 1)])))]
3767 movlps\t{%1, %0|%0, %1}
3768 movaps\t{%1, %0|%0, %1}
3769 movlps\t{%1, %0|%0, %1}"
3770 [(set_attr "type" "ssemov")
3771 (set_attr "mode" "V2SF,V4SF,V2SF")])
3773 (define_expand "sse_loadlps_exp"
3774 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3776 (match_operand:V2SF 2 "nonimmediate_operand" "")
3778 (match_operand:V4SF 1 "nonimmediate_operand" "")
3779 (parallel [(const_int 2) (const_int 3)]))))]
3781 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3783 (define_insn "*avx_loadlps"
3784 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3786 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3788 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3789 (parallel [(const_int 2) (const_int 3)]))))]
3792 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3793 vmovlps\t{%2, %1, %0|%0, %1, %2}
3794 vmovlps\t{%2, %0|%0, %2}"
3795 [(set_attr "type" "sselog,ssemov,ssemov")
3796 (set_attr "length_immediate" "1,*,*")
3797 (set_attr "prefix" "vex")
3798 (set_attr "mode" "V4SF,V2SF,V2SF")])
3800 (define_insn "sse_loadlps"
3801 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3803 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3805 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3806 (parallel [(const_int 2) (const_int 3)]))))]
3809 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3810 movlps\t{%2, %0|%0, %2}
3811 movlps\t{%2, %0|%0, %2}"
3812 [(set_attr "type" "sselog,ssemov,ssemov")
3813 (set_attr "length_immediate" "1,*,*")
3814 (set_attr "mode" "V4SF,V2SF,V2SF")])
3816 (define_insn "*avx_movss"
3817 [(set (match_operand:V4SF 0 "register_operand" "=x")
3819 (match_operand:V4SF 2 "register_operand" "x")
3820 (match_operand:V4SF 1 "register_operand" "x")
3823 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3824 [(set_attr "type" "ssemov")
3825 (set_attr "prefix" "vex")
3826 (set_attr "mode" "SF")])
3828 (define_insn "sse_movss"
3829 [(set (match_operand:V4SF 0 "register_operand" "=x")
3831 (match_operand:V4SF 2 "register_operand" "x")
3832 (match_operand:V4SF 1 "register_operand" "0")
3835 "movss\t{%2, %0|%0, %2}"
3836 [(set_attr "type" "ssemov")
3837 (set_attr "mode" "SF")])
3839 (define_expand "vec_dupv4sf"
3840 [(set (match_operand:V4SF 0 "register_operand" "")
3842 (match_operand:SF 1 "nonimmediate_operand" "")))]
3846 operands[1] = force_reg (V4SFmode, operands[1]);
3849 (define_insn "*vec_dupv4sf_avx"
3850 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3852 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3855 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3856 vbroadcastss\t{%1, %0|%0, %1}"
3857 [(set_attr "type" "sselog1,ssemov")
3858 (set_attr "length_immediate" "1,0")
3859 (set_attr "prefix_extra" "0,1")
3860 (set_attr "prefix" "vex")
3861 (set_attr "mode" "V4SF")])
3863 (define_insn "*vec_dupv4sf"
3864 [(set (match_operand:V4SF 0 "register_operand" "=x")
3866 (match_operand:SF 1 "register_operand" "0")))]
3868 "shufps\t{$0, %0, %0|%0, %0, 0}"
3869 [(set_attr "type" "sselog1")
3870 (set_attr "length_immediate" "1")
3871 (set_attr "mode" "V4SF")])
3873 (define_insn "*vec_concatv2sf_avx"
3874 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3876 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3877 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3880 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3881 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3882 vmovss\t{%1, %0|%0, %1}
3883 punpckldq\t{%2, %0|%0, %2}
3884 movd\t{%1, %0|%0, %1}"
3885 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3886 (set_attr "length_immediate" "*,1,*,*,*")
3887 (set_attr "prefix_extra" "*,1,*,*,*")
3888 (set (attr "prefix")
3889 (if_then_else (eq_attr "alternative" "3,4")
3890 (const_string "orig")
3891 (const_string "vex")))
3892 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3894 ;; Although insertps takes register source, we prefer
3895 ;; unpcklps with register source since it is shorter.
3896 (define_insn "*vec_concatv2sf_sse4_1"
3897 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3899 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3900 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3903 unpcklps\t{%2, %0|%0, %2}
3904 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3905 movss\t{%1, %0|%0, %1}
3906 punpckldq\t{%2, %0|%0, %2}
3907 movd\t{%1, %0|%0, %1}"
3908 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3909 (set_attr "prefix_data16" "*,1,*,*,*")
3910 (set_attr "prefix_extra" "*,1,*,*,*")
3911 (set_attr "length_immediate" "*,1,*,*,*")
3912 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3914 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3915 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3916 ;; alternatives pretty much forces the MMX alternative to be chosen.
3917 (define_insn "*vec_concatv2sf_sse"
3918 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3920 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3921 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3924 unpcklps\t{%2, %0|%0, %2}
3925 movss\t{%1, %0|%0, %1}
3926 punpckldq\t{%2, %0|%0, %2}
3927 movd\t{%1, %0|%0, %1}"
3928 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3929 (set_attr "mode" "V4SF,SF,DI,DI")])
3931 (define_insn "*vec_concatv4sf_avx"
3932 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3934 (match_operand:V2SF 1 "register_operand" " x,x")
3935 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3938 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3939 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3940 [(set_attr "type" "ssemov")
3941 (set_attr "prefix" "vex")
3942 (set_attr "mode" "V4SF,V2SF")])
3944 (define_insn "*vec_concatv4sf_sse"
3945 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3947 (match_operand:V2SF 1 "register_operand" " 0,0")
3948 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3951 movlhps\t{%2, %0|%0, %2}
3952 movhps\t{%2, %0|%0, %2}"
3953 [(set_attr "type" "ssemov")
3954 (set_attr "mode" "V4SF,V2SF")])
3956 (define_expand "vec_init<mode>"
3957 [(match_operand:SSEMODE 0 "register_operand" "")
3958 (match_operand 1 "" "")]
3961 ix86_expand_vector_init (false, operands[0], operands[1]);
3965 (define_insn "*vec_set<mode>_0_avx"
3966 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3967 (vec_merge:SSEMODE4S
3968 (vec_duplicate:SSEMODE4S
3969 (match_operand:<ssescalarmode> 2
3970 "general_operand" " x,m,*r,x,*rm,x*rfF"))
3971 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
3975 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
3976 vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3977 vmovd\t{%2, %0|%0, %2}
3978 vmovss\t{%2, %1, %0|%0, %1, %2}
3979 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3981 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3982 (set_attr "prefix_extra" "*,*,*,*,1,*")
3983 (set_attr "length_immediate" "*,*,*,*,1,*")
3984 (set_attr "prefix" "vex")
3985 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3987 (define_insn "*vec_set<mode>_0_sse4_1"
3988 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3989 (vec_merge:SSEMODE4S
3990 (vec_duplicate:SSEMODE4S
3991 (match_operand:<ssescalarmode> 2
3992 "general_operand" " x,m,*r,x,*rm,*rfF"))
3993 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
3997 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
3998 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3999 movd\t{%2, %0|%0, %2}
4000 movss\t{%2, %0|%0, %2}
4001 pinsrd\t{$0, %2, %0|%0, %2, 0}
4003 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
4004 (set_attr "prefix_extra" "*,*,*,*,1,*")
4005 (set_attr "length_immediate" "*,*,*,*,1,*")
4006 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
4008 (define_insn "*vec_set<mode>_0_sse2"
4009 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
4010 (vec_merge:SSEMODE4S
4011 (vec_duplicate:SSEMODE4S
4012 (match_operand:<ssescalarmode> 2
4013 "general_operand" " m,*r,x,x*rfF"))
4014 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
4018 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4019 movd\t{%2, %0|%0, %2}
4020 movss\t{%2, %0|%0, %2}
4022 [(set_attr "type" "ssemov")
4023 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
4025 (define_insn "vec_set<mode>_0"
4026 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
4027 (vec_merge:SSEMODE4S
4028 (vec_duplicate:SSEMODE4S
4029 (match_operand:<ssescalarmode> 2
4030 "general_operand" " m,x,x*rfF"))
4031 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
4035 movss\t{%2, %0|%0, %2}
4036 movss\t{%2, %0|%0, %2}
4038 [(set_attr "type" "ssemov")
4039 (set_attr "mode" "SF,SF,*")])
4041 ;; A subset is vec_setv4sf.
4042 (define_insn "*vec_setv4sf_avx"
4043 [(set (match_operand:V4SF 0 "register_operand" "=x")
4046 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4047 (match_operand:V4SF 1 "register_operand" "x")
4048 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4051 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4052 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4054 [(set_attr "type" "sselog")
4055 (set_attr "prefix_extra" "1")
4056 (set_attr "length_immediate" "1")
4057 (set_attr "prefix" "vex")
4058 (set_attr "mode" "V4SF")])
4060 (define_insn "*vec_setv4sf_sse4_1"
4061 [(set (match_operand:V4SF 0 "register_operand" "=x")
4064 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4065 (match_operand:V4SF 1 "register_operand" "0")
4066 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4069 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4070 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4072 [(set_attr "type" "sselog")
4073 (set_attr "prefix_data16" "1")
4074 (set_attr "prefix_extra" "1")
4075 (set_attr "length_immediate" "1")
4076 (set_attr "mode" "V4SF")])
4078 (define_insn "*avx_insertps"
4079 [(set (match_operand:V4SF 0 "register_operand" "=x")
4080 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4081 (match_operand:V4SF 1 "register_operand" "x")
4082 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4085 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4086 [(set_attr "type" "sselog")
4087 (set_attr "prefix" "vex")
4088 (set_attr "prefix_extra" "1")
4089 (set_attr "length_immediate" "1")
4090 (set_attr "mode" "V4SF")])
4092 (define_insn "sse4_1_insertps"
4093 [(set (match_operand:V4SF 0 "register_operand" "=x")
4094 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4095 (match_operand:V4SF 1 "register_operand" "0")
4096 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4099 "insertps\t{%3, %2, %0|%0, %2, %3}";
4100 [(set_attr "type" "sselog")
4101 (set_attr "prefix_data16" "1")
4102 (set_attr "prefix_extra" "1")
4103 (set_attr "length_immediate" "1")
4104 (set_attr "mode" "V4SF")])
4107 [(set (match_operand:SSEMODE4S 0 "memory_operand" "")
4108 (vec_merge:SSEMODE4S
4109 (vec_duplicate:SSEMODE4S
4110 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
4113 "TARGET_SSE && reload_completed"
4116 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4121 (define_expand "vec_set<mode>"
4122 [(match_operand:SSEMODE 0 "register_operand" "")
4123 (match_operand:<ssescalarmode> 1 "register_operand" "")
4124 (match_operand 2 "const_int_operand" "")]
4127 ix86_expand_vector_set (false, operands[0], operands[1],
4128 INTVAL (operands[2]));
4132 (define_insn_and_split "*vec_extractv4sf_0"
4133 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4135 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4136 (parallel [(const_int 0)])))]
4137 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4139 "&& reload_completed"
4142 rtx op1 = operands[1];
4144 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4146 op1 = gen_lowpart (SFmode, op1);
4147 emit_move_insn (operands[0], op1);
4151 (define_expand "avx_vextractf128<mode>"
4152 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4153 (match_operand:AVX256MODE 1 "register_operand" "")
4154 (match_operand:SI 2 "const_0_to_1_operand" "")]
4157 switch (INTVAL (operands[2]))
4160 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4163 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4171 (define_insn_and_split "vec_extract_lo_<mode>"
4172 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4173 (vec_select:<avxhalfvecmode>
4174 (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x")
4175 (parallel [(const_int 0) (const_int 1)])))]
4178 "&& reload_completed"
4181 rtx op1 = operands[1];
4183 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4185 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4186 emit_move_insn (operands[0], op1);
4190 (define_insn "vec_extract_hi_<mode>"
4191 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4192 (vec_select:<avxhalfvecmode>
4193 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4194 (parallel [(const_int 2) (const_int 3)])))]
4196 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4197 [(set_attr "type" "sselog")
4198 (set_attr "prefix_extra" "1")
4199 (set_attr "length_immediate" "1")
4200 (set_attr "memory" "none,store")
4201 (set_attr "prefix" "vex")
4202 (set_attr "mode" "V8SF")])
4204 (define_insn_and_split "vec_extract_lo_<mode>"
4205 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4206 (vec_select:<avxhalfvecmode>
4207 (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x")
4208 (parallel [(const_int 0) (const_int 1)
4209 (const_int 2) (const_int 3)])))]
4212 "&& reload_completed"
4215 rtx op1 = operands[1];
4217 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4219 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4220 emit_move_insn (operands[0], op1);
4224 (define_insn "vec_extract_hi_<mode>"
4225 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4226 (vec_select:<avxhalfvecmode>
4227 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4228 (parallel [(const_int 4) (const_int 5)
4229 (const_int 6) (const_int 7)])))]
4231 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4232 [(set_attr "type" "sselog")
4233 (set_attr "prefix_extra" "1")
4234 (set_attr "length_immediate" "1")
4235 (set_attr "memory" "none,store")
4236 (set_attr "prefix" "vex")
4237 (set_attr "mode" "V8SF")])
4239 (define_insn_and_split "vec_extract_lo_v16hi"
4240 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4242 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4243 (parallel [(const_int 0) (const_int 1)
4244 (const_int 2) (const_int 3)
4245 (const_int 4) (const_int 5)
4246 (const_int 6) (const_int 7)])))]
4249 "&& reload_completed"
4252 rtx op1 = operands[1];
4254 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4256 op1 = gen_lowpart (V8HImode, op1);
4257 emit_move_insn (operands[0], op1);
4261 (define_insn "vec_extract_hi_v16hi"
4262 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4264 (match_operand:V16HI 1 "register_operand" "x,x")
4265 (parallel [(const_int 8) (const_int 9)
4266 (const_int 10) (const_int 11)
4267 (const_int 12) (const_int 13)
4268 (const_int 14) (const_int 15)])))]
4270 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4271 [(set_attr "type" "sselog")
4272 (set_attr "prefix_extra" "1")
4273 (set_attr "length_immediate" "1")
4274 (set_attr "memory" "none,store")
4275 (set_attr "prefix" "vex")
4276 (set_attr "mode" "V8SF")])
4278 (define_insn_and_split "vec_extract_lo_v32qi"
4279 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4281 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4282 (parallel [(const_int 0) (const_int 1)
4283 (const_int 2) (const_int 3)
4284 (const_int 4) (const_int 5)
4285 (const_int 6) (const_int 7)
4286 (const_int 8) (const_int 9)
4287 (const_int 10) (const_int 11)
4288 (const_int 12) (const_int 13)
4289 (const_int 14) (const_int 15)])))]
4292 "&& reload_completed"
4295 rtx op1 = operands[1];
4297 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4299 op1 = gen_lowpart (V16QImode, op1);
4300 emit_move_insn (operands[0], op1);
4304 (define_insn "vec_extract_hi_v32qi"
4305 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4307 (match_operand:V32QI 1 "register_operand" "x,x")
4308 (parallel [(const_int 16) (const_int 17)
4309 (const_int 18) (const_int 19)
4310 (const_int 20) (const_int 21)
4311 (const_int 22) (const_int 23)
4312 (const_int 24) (const_int 25)
4313 (const_int 26) (const_int 27)
4314 (const_int 28) (const_int 29)
4315 (const_int 30) (const_int 31)])))]
4317 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4318 [(set_attr "type" "sselog")
4319 (set_attr "prefix_extra" "1")
4320 (set_attr "length_immediate" "1")
4321 (set_attr "memory" "none,store")
4322 (set_attr "prefix" "vex")
4323 (set_attr "mode" "V8SF")])
4325 (define_insn "*sse4_1_extractps"
4326 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4328 (match_operand:V4SF 1 "register_operand" "x")
4329 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4331 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4332 [(set_attr "type" "sselog")
4333 (set_attr "prefix_data16" "1")
4334 (set_attr "prefix_extra" "1")
4335 (set_attr "length_immediate" "1")
4336 (set_attr "prefix" "maybe_vex")
4337 (set_attr "mode" "V4SF")])
4339 (define_insn_and_split "*vec_extract_v4sf_mem"
4340 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4342 (match_operand:V4SF 1 "memory_operand" "o")
4343 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4349 int i = INTVAL (operands[2]);
4351 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4355 (define_expand "vec_extract<mode>"
4356 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4357 (match_operand:SSEMODE 1 "register_operand" "")
4358 (match_operand 2 "const_int_operand" "")]
4361 ix86_expand_vector_extract (false, operands[0], operands[1],
4362 INTVAL (operands[2]));
4366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4368 ;; Parallel double-precision floating point element swizzling
4370 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4372 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4373 (define_insn "avx_unpckhpd256"
4374 [(set (match_operand:V4DF 0 "register_operand" "=x")
4377 (match_operand:V4DF 1 "register_operand" "x")
4378 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4379 (parallel [(const_int 1) (const_int 5)
4380 (const_int 3) (const_int 7)])))]
4382 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4383 [(set_attr "type" "sselog")
4384 (set_attr "prefix" "vex")
4385 (set_attr "mode" "V4DF")])
4387 (define_expand "vec_interleave_highv2df"
4388 [(set (match_operand:V2DF 0 "register_operand" "")
4391 (match_operand:V2DF 1 "nonimmediate_operand" "")
4392 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4393 (parallel [(const_int 1)
4397 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4398 operands[2] = force_reg (V2DFmode, operands[2]);
4401 (define_insn "*avx_interleave_highv2df"
4402 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4405 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4406 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4407 (parallel [(const_int 1)
4409 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4411 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4412 vmovddup\t{%H1, %0|%0, %H1}
4413 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4414 vmovhpd\t{%1, %0|%0, %1}"
4415 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4416 (set_attr "prefix" "vex")
4417 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4419 (define_insn "*sse3_interleave_highv2df"
4420 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4423 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4424 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4425 (parallel [(const_int 1)
4427 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4429 unpckhpd\t{%2, %0|%0, %2}
4430 movddup\t{%H1, %0|%0, %H1}
4431 movlpd\t{%H1, %0|%0, %H1}
4432 movhpd\t{%1, %0|%0, %1}"
4433 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4434 (set_attr "prefix_data16" "*,*,1,1")
4435 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4437 (define_insn "*sse2_interleave_highv2df"
4438 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4441 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4442 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4443 (parallel [(const_int 1)
4445 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4447 unpckhpd\t{%2, %0|%0, %2}
4448 movlpd\t{%H1, %0|%0, %H1}
4449 movhpd\t{%1, %0|%0, %1}"
4450 [(set_attr "type" "sselog,ssemov,ssemov")
4451 (set_attr "prefix_data16" "*,1,1")
4452 (set_attr "mode" "V2DF,V1DF,V1DF")])
4454 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4455 (define_expand "avx_movddup256"
4456 [(set (match_operand:V4DF 0 "register_operand" "")
4459 (match_operand:V4DF 1 "nonimmediate_operand" "")
4461 (parallel [(const_int 0) (const_int 4)
4462 (const_int 2) (const_int 6)])))]
4465 (define_expand "avx_unpcklpd256"
4466 [(set (match_operand:V4DF 0 "register_operand" "")
4469 (match_operand:V4DF 1 "register_operand" "")
4470 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4471 (parallel [(const_int 0) (const_int 4)
4472 (const_int 2) (const_int 6)])))]
4475 (define_insn "*avx_unpcklpd256"
4476 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4479 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4480 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4481 (parallel [(const_int 0) (const_int 4)
4482 (const_int 2) (const_int 6)])))]
4484 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4486 vmovddup\t{%1, %0|%0, %1}
4487 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4488 [(set_attr "type" "sselog")
4489 (set_attr "prefix" "vex")
4490 (set_attr "mode" "V4DF")])
4492 (define_expand "vec_interleave_lowv2df"
4493 [(set (match_operand:V2DF 0 "register_operand" "")
4496 (match_operand:V2DF 1 "nonimmediate_operand" "")
4497 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4498 (parallel [(const_int 0)
4502 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4503 operands[1] = force_reg (V2DFmode, operands[1]);
4506 (define_insn "*avx_interleave_lowv2df"
4507 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4510 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4511 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4512 (parallel [(const_int 0)
4514 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4516 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4517 vmovddup\t{%1, %0|%0, %1}
4518 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4519 vmovlpd\t{%2, %H0|%H0, %2}"
4520 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4521 (set_attr "prefix" "vex")
4522 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4524 (define_insn "*sse3_interleave_lowv2df"
4525 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4528 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4529 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4530 (parallel [(const_int 0)
4532 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4534 unpcklpd\t{%2, %0|%0, %2}
4535 movddup\t{%1, %0|%0, %1}
4536 movhpd\t{%2, %0|%0, %2}
4537 movlpd\t{%2, %H0|%H0, %2}"
4538 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4539 (set_attr "prefix_data16" "*,*,1,1")
4540 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4542 (define_insn "*sse2_interleave_lowv2df"
4543 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4546 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4547 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4548 (parallel [(const_int 0)
4550 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4552 unpcklpd\t{%2, %0|%0, %2}
4553 movhpd\t{%2, %0|%0, %2}
4554 movlpd\t{%2, %H0|%H0, %2}"
4555 [(set_attr "type" "sselog,ssemov,ssemov")
4556 (set_attr "prefix_data16" "*,1,1")
4557 (set_attr "mode" "V2DF,V1DF,V1DF")])
4560 [(set (match_operand:V2DF 0 "memory_operand" "")
4563 (match_operand:V2DF 1 "register_operand" "")
4565 (parallel [(const_int 0)
4567 "TARGET_SSE3 && reload_completed"
4570 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4571 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4572 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4577 [(set (match_operand:V2DF 0 "register_operand" "")
4580 (match_operand:V2DF 1 "memory_operand" "")
4582 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4583 (match_operand:SI 3 "const_int_operand" "")])))]
4584 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4585 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4587 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4590 (define_expand "avx_shufpd256"
4591 [(match_operand:V4DF 0 "register_operand" "")
4592 (match_operand:V4DF 1 "register_operand" "")
4593 (match_operand:V4DF 2 "nonimmediate_operand" "")
4594 (match_operand:SI 3 "const_int_operand" "")]
4597 int mask = INTVAL (operands[3]);
4598 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4600 GEN_INT (mask & 2 ? 5 : 4),
4601 GEN_INT (mask & 4 ? 3 : 2),
4602 GEN_INT (mask & 8 ? 7 : 6)));
4606 (define_insn "avx_shufpd256_1"
4607 [(set (match_operand:V4DF 0 "register_operand" "=x")
4610 (match_operand:V4DF 1 "register_operand" "x")
4611 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4612 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4613 (match_operand 4 "const_4_to_5_operand" "")
4614 (match_operand 5 "const_2_to_3_operand" "")
4615 (match_operand 6 "const_6_to_7_operand" "")])))]
4619 mask = INTVAL (operands[3]);
4620 mask |= (INTVAL (operands[4]) - 4) << 1;
4621 mask |= (INTVAL (operands[5]) - 2) << 2;
4622 mask |= (INTVAL (operands[6]) - 6) << 3;
4623 operands[3] = GEN_INT (mask);
4625 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4627 [(set_attr "type" "sselog")
4628 (set_attr "length_immediate" "1")
4629 (set_attr "prefix" "vex")
4630 (set_attr "mode" "V4DF")])
4632 (define_expand "sse2_shufpd"
4633 [(match_operand:V2DF 0 "register_operand" "")
4634 (match_operand:V2DF 1 "register_operand" "")
4635 (match_operand:V2DF 2 "nonimmediate_operand" "")
4636 (match_operand:SI 3 "const_int_operand" "")]
4639 int mask = INTVAL (operands[3]);
4640 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4642 GEN_INT (mask & 2 ? 3 : 2)));
4646 (define_expand "vec_extract_even<mode>"
4647 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4648 (match_operand:SSEMODE_EO 1 "register_operand" "")
4649 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4652 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4656 (define_expand "vec_extract_odd<mode>"
4657 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4658 (match_operand:SSEMODE_EO 1 "register_operand" "")
4659 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4662 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4666 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4667 (define_insn "*avx_interleave_highv2di"
4668 [(set (match_operand:V2DI 0 "register_operand" "=x")
4671 (match_operand:V2DI 1 "register_operand" "x")
4672 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4673 (parallel [(const_int 1)
4676 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4677 [(set_attr "type" "sselog")
4678 (set_attr "prefix" "vex")
4679 (set_attr "mode" "TI")])
4681 (define_insn "vec_interleave_highv2di"
4682 [(set (match_operand:V2DI 0 "register_operand" "=x")
4685 (match_operand:V2DI 1 "register_operand" "0")
4686 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4687 (parallel [(const_int 1)
4690 "punpckhqdq\t{%2, %0|%0, %2}"
4691 [(set_attr "type" "sselog")
4692 (set_attr "prefix_data16" "1")
4693 (set_attr "mode" "TI")])
4695 (define_insn "*avx_interleave_lowv2di"
4696 [(set (match_operand:V2DI 0 "register_operand" "=x")
4699 (match_operand:V2DI 1 "register_operand" "x")
4700 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4701 (parallel [(const_int 0)
4704 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4705 [(set_attr "type" "sselog")
4706 (set_attr "prefix" "vex")
4707 (set_attr "mode" "TI")])
4709 (define_insn "vec_interleave_lowv2di"
4710 [(set (match_operand:V2DI 0 "register_operand" "=x")
4713 (match_operand:V2DI 1 "register_operand" "0")
4714 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4715 (parallel [(const_int 0)
4718 "punpcklqdq\t{%2, %0|%0, %2}"
4719 [(set_attr "type" "sselog")
4720 (set_attr "prefix_data16" "1")
4721 (set_attr "mode" "TI")])
4723 (define_insn "*avx_shufpd_<mode>"
4724 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4725 (vec_select:SSEMODE2D
4726 (vec_concat:<ssedoublesizemode>
4727 (match_operand:SSEMODE2D 1 "register_operand" "x")
4728 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4729 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4730 (match_operand 4 "const_2_to_3_operand" "")])))]
4734 mask = INTVAL (operands[3]);
4735 mask |= (INTVAL (operands[4]) - 2) << 1;
4736 operands[3] = GEN_INT (mask);
4738 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4740 [(set_attr "type" "sselog")
4741 (set_attr "length_immediate" "1")
4742 (set_attr "prefix" "vex")
4743 (set_attr "mode" "V2DF")])
4745 (define_insn "sse2_shufpd_<mode>"
4746 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4747 (vec_select:SSEMODE2D
4748 (vec_concat:<ssedoublesizemode>
4749 (match_operand:SSEMODE2D 1 "register_operand" "0")
4750 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4751 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4752 (match_operand 4 "const_2_to_3_operand" "")])))]
4756 mask = INTVAL (operands[3]);
4757 mask |= (INTVAL (operands[4]) - 2) << 1;
4758 operands[3] = GEN_INT (mask);
4760 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4762 [(set_attr "type" "sselog")
4763 (set_attr "length_immediate" "1")
4764 (set_attr "mode" "V2DF")])
4766 ;; Avoid combining registers from different units in a single alternative,
4767 ;; see comment above inline_secondary_memory_needed function in i386.c
4768 (define_insn "*avx_storehpd"
4769 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4771 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4772 (parallel [(const_int 1)])))]
4773 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4775 vmovhpd\t{%1, %0|%0, %1}
4776 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4780 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4781 (set_attr "prefix" "vex")
4782 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4784 (define_insn "sse2_storehpd"
4785 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4787 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4788 (parallel [(const_int 1)])))]
4789 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4791 movhpd\t{%1, %0|%0, %1}
4796 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4797 (set_attr "prefix_data16" "1,*,*,*,*")
4798 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4801 [(set (match_operand:DF 0 "register_operand" "")
4803 (match_operand:V2DF 1 "memory_operand" "")
4804 (parallel [(const_int 1)])))]
4805 "TARGET_SSE2 && reload_completed"
4806 [(set (match_dup 0) (match_dup 1))]
4807 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4809 ;; Avoid combining registers from different units in a single alternative,
4810 ;; see comment above inline_secondary_memory_needed function in i386.c
4811 (define_insn "sse2_storelpd"
4812 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4814 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4815 (parallel [(const_int 0)])))]
4816 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4818 %vmovlpd\t{%1, %0|%0, %1}
4823 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4824 (set_attr "prefix_data16" "1,*,*,*,*")
4825 (set_attr "prefix" "maybe_vex")
4826 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4829 [(set (match_operand:DF 0 "register_operand" "")
4831 (match_operand:V2DF 1 "nonimmediate_operand" "")
4832 (parallel [(const_int 0)])))]
4833 "TARGET_SSE2 && reload_completed"
4836 rtx op1 = operands[1];
4838 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4840 op1 = gen_lowpart (DFmode, op1);
4841 emit_move_insn (operands[0], op1);
4845 (define_expand "sse2_loadhpd_exp"
4846 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4849 (match_operand:V2DF 1 "nonimmediate_operand" "")
4850 (parallel [(const_int 0)]))
4851 (match_operand:DF 2 "nonimmediate_operand" "")))]
4853 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4855 ;; Avoid combining registers from different units in a single alternative,
4856 ;; see comment above inline_secondary_memory_needed function in i386.c
4857 (define_insn "*avx_loadhpd"
4858 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4861 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4862 (parallel [(const_int 0)]))
4863 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4864 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4866 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4867 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4871 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4872 (set_attr "prefix" "vex")
4873 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4875 (define_insn "sse2_loadhpd"
4876 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4879 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4880 (parallel [(const_int 0)]))
4881 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4882 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4884 movhpd\t{%2, %0|%0, %2}
4885 unpcklpd\t{%2, %0|%0, %2}
4886 shufpd\t{$1, %1, %0|%0, %1, 1}
4890 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4891 (set_attr "prefix_data16" "1,*,*,*,*,*")
4892 (set_attr "length_immediate" "*,*,1,*,*,*")
4893 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4896 [(set (match_operand:V2DF 0 "memory_operand" "")
4898 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4899 (match_operand:DF 1 "register_operand" "")))]
4900 "TARGET_SSE2 && reload_completed"
4901 [(set (match_dup 0) (match_dup 1))]
4902 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4904 (define_expand "sse2_loadlpd_exp"
4905 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4907 (match_operand:DF 2 "nonimmediate_operand" "")
4909 (match_operand:V2DF 1 "nonimmediate_operand" "")
4910 (parallel [(const_int 1)]))))]
4912 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4914 ;; Avoid combining registers from different units in a single alternative,
4915 ;; see comment above inline_secondary_memory_needed function in i386.c
4916 (define_insn "*avx_loadlpd"
4917 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4919 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4921 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4922 (parallel [(const_int 1)]))))]
4923 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4925 vmovsd\t{%2, %0|%0, %2}
4926 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4927 vmovsd\t{%2, %1, %0|%0, %1, %2}
4928 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4932 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4933 (set_attr "prefix" "vex")
4934 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4936 (define_insn "sse2_loadlpd"
4937 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4939 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4941 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4942 (parallel [(const_int 1)]))))]
4943 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4945 movsd\t{%2, %0|%0, %2}
4946 movlpd\t{%2, %0|%0, %2}
4947 movsd\t{%2, %0|%0, %2}
4948 shufpd\t{$2, %2, %0|%0, %2, 2}
4949 movhpd\t{%H1, %0|%0, %H1}
4953 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4954 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4955 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4956 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4959 [(set (match_operand:V2DF 0 "memory_operand" "")
4961 (match_operand:DF 1 "register_operand" "")
4962 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4963 "TARGET_SSE2 && reload_completed"
4964 [(set (match_dup 0) (match_dup 1))]
4965 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4967 ;; Not sure these two are ever used, but it doesn't hurt to have
4969 (define_insn "*vec_extractv2df_1_sse"
4970 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4972 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4973 (parallel [(const_int 1)])))]
4974 "!TARGET_SSE2 && TARGET_SSE
4975 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4977 movhps\t{%1, %0|%0, %1}
4978 movhlps\t{%1, %0|%0, %1}
4979 movlps\t{%H1, %0|%0, %H1}"
4980 [(set_attr "type" "ssemov")
4981 (set_attr "mode" "V2SF,V4SF,V2SF")])
4983 (define_insn "*vec_extractv2df_0_sse"
4984 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4986 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4987 (parallel [(const_int 0)])))]
4988 "!TARGET_SSE2 && TARGET_SSE
4989 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4991 movlps\t{%1, %0|%0, %1}
4992 movaps\t{%1, %0|%0, %1}
4993 movlps\t{%1, %0|%0, %1}"
4994 [(set_attr "type" "ssemov")
4995 (set_attr "mode" "V2SF,V4SF,V2SF")])
4997 (define_insn "*avx_movsd"
4998 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
5000 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5001 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5005 vmovsd\t{%2, %1, %0|%0, %1, %2}
5006 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5007 vmovlpd\t{%2, %0|%0, %2}
5008 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5009 vmovhps\t{%1, %H0|%H0, %1}"
5010 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5011 (set_attr "prefix" "vex")
5012 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5014 (define_insn "sse2_movsd"
5015 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5017 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5018 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5022 movsd\t{%2, %0|%0, %2}
5023 movlpd\t{%2, %0|%0, %2}
5024 movlpd\t{%2, %0|%0, %2}
5025 shufpd\t{$2, %2, %0|%0, %2, 2}
5026 movhps\t{%H1, %0|%0, %H1}
5027 movhps\t{%1, %H0|%H0, %1}"
5028 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5029 (set_attr "prefix_data16" "*,1,1,*,*,*")
5030 (set_attr "length_immediate" "*,*,*,1,*,*")
5031 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5033 (define_insn "*vec_dupv2df_sse3"
5034 [(set (match_operand:V2DF 0 "register_operand" "=x")
5036 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5038 "%vmovddup\t{%1, %0|%0, %1}"
5039 [(set_attr "type" "sselog1")
5040 (set_attr "prefix" "maybe_vex")
5041 (set_attr "mode" "DF")])
5043 (define_insn "vec_dupv2df"
5044 [(set (match_operand:V2DF 0 "register_operand" "=x")
5046 (match_operand:DF 1 "register_operand" "0")))]
5049 [(set_attr "type" "sselog1")
5050 (set_attr "mode" "V2DF")])
5052 (define_insn "*vec_concatv2df_sse3"
5053 [(set (match_operand:V2DF 0 "register_operand" "=x")
5055 (match_operand:DF 1 "nonimmediate_operand" "xm")
5058 "%vmovddup\t{%1, %0|%0, %1}"
5059 [(set_attr "type" "sselog1")
5060 (set_attr "prefix" "maybe_vex")
5061 (set_attr "mode" "DF")])
5063 (define_insn "*vec_concatv2df_avx"
5064 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5066 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5067 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5070 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5071 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5072 vmovsd\t{%1, %0|%0, %1}"
5073 [(set_attr "type" "ssemov")
5074 (set_attr "prefix" "vex")
5075 (set_attr "mode" "DF,V1DF,DF")])
5077 (define_insn "*vec_concatv2df"
5078 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5080 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5081 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5084 unpcklpd\t{%2, %0|%0, %2}
5085 movhpd\t{%2, %0|%0, %2}
5086 movsd\t{%1, %0|%0, %1}
5087 movlhps\t{%2, %0|%0, %2}
5088 movhps\t{%2, %0|%0, %2}"
5089 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5090 (set_attr "prefix_data16" "*,1,*,*,*")
5091 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5093 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5095 ;; Parallel integral arithmetic
5097 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5099 (define_expand "neg<mode>2"
5100 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5103 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5105 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5107 (define_expand "<plusminus_insn><mode>3"
5108 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5110 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5111 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5113 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5115 (define_insn "*avx_<plusminus_insn><mode>3"
5116 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5118 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5119 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5120 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5121 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5122 [(set_attr "type" "sseiadd")
5123 (set_attr "prefix" "vex")
5124 (set_attr "mode" "TI")])
5126 (define_insn "*<plusminus_insn><mode>3"
5127 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5129 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5130 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5131 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5132 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5133 [(set_attr "type" "sseiadd")
5134 (set_attr "prefix_data16" "1")
5135 (set_attr "mode" "TI")])
5137 (define_expand "sse2_<plusminus_insn><mode>3"
5138 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5139 (sat_plusminus:SSEMODE12
5140 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5141 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5143 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5145 (define_insn "*avx_<plusminus_insn><mode>3"
5146 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5147 (sat_plusminus:SSEMODE12
5148 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5149 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5150 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5151 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5152 [(set_attr "type" "sseiadd")
5153 (set_attr "prefix" "vex")
5154 (set_attr "mode" "TI")])
5156 (define_insn "*sse2_<plusminus_insn><mode>3"
5157 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5158 (sat_plusminus:SSEMODE12
5159 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5160 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5161 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5162 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5163 [(set_attr "type" "sseiadd")
5164 (set_attr "prefix_data16" "1")
5165 (set_attr "mode" "TI")])
5167 (define_insn_and_split "mulv16qi3"
5168 [(set (match_operand:V16QI 0 "register_operand" "")
5169 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5170 (match_operand:V16QI 2 "register_operand" "")))]
5172 && can_create_pseudo_p ()"
5180 for (i = 0; i < 6; ++i)
5181 t[i] = gen_reg_rtx (V16QImode);
5183 /* Unpack data such that we've got a source byte in each low byte of
5184 each word. We don't care what goes into the high byte of each word.
5185 Rather than trying to get zero in there, most convenient is to let
5186 it be a copy of the low byte. */
5187 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5188 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5189 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5190 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5192 /* Multiply words. The end-of-line annotations here give a picture of what
5193 the output of that instruction looks like. Dot means don't care; the
5194 letters are the bytes of the result with A being the most significant. */
5195 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5196 gen_lowpart (V8HImode, t[0]),
5197 gen_lowpart (V8HImode, t[1])));
5198 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5199 gen_lowpart (V8HImode, t[2]),
5200 gen_lowpart (V8HImode, t[3])));
5202 /* Extract the even bytes and merge them back together. */
5203 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5207 (define_expand "mulv8hi3"
5208 [(set (match_operand:V8HI 0 "register_operand" "")
5209 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5210 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5212 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5214 (define_insn "*avx_mulv8hi3"
5215 [(set (match_operand:V8HI 0 "register_operand" "=x")
5216 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5217 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5218 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5219 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5220 [(set_attr "type" "sseimul")
5221 (set_attr "prefix" "vex")
5222 (set_attr "mode" "TI")])
5224 (define_insn "*mulv8hi3"
5225 [(set (match_operand:V8HI 0 "register_operand" "=x")
5226 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5227 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5228 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5229 "pmullw\t{%2, %0|%0, %2}"
5230 [(set_attr "type" "sseimul")
5231 (set_attr "prefix_data16" "1")
5232 (set_attr "mode" "TI")])
5234 (define_expand "<s>mulv8hi3_highpart"
5235 [(set (match_operand:V8HI 0 "register_operand" "")
5240 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5242 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5245 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5247 (define_insn "*avx_<s>mulv8hi3_highpart"
5248 [(set (match_operand:V8HI 0 "register_operand" "=x")
5253 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5255 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5257 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5258 "vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5259 [(set_attr "type" "sseimul")
5260 (set_attr "prefix" "vex")
5261 (set_attr "mode" "TI")])
5263 (define_insn "*<s>mulv8hi3_highpart"
5264 [(set (match_operand:V8HI 0 "register_operand" "=x")
5269 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5271 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5273 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5274 "pmulh<u>w\t{%2, %0|%0, %2}"
5275 [(set_attr "type" "sseimul")
5276 (set_attr "prefix_data16" "1")
5277 (set_attr "mode" "TI")])
5279 (define_expand "sse2_umulv2siv2di3"
5280 [(set (match_operand:V2DI 0 "register_operand" "")
5284 (match_operand:V4SI 1 "nonimmediate_operand" "")
5285 (parallel [(const_int 0) (const_int 2)])))
5288 (match_operand:V4SI 2 "nonimmediate_operand" "")
5289 (parallel [(const_int 0) (const_int 2)])))))]
5291 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5293 (define_insn "*avx_umulv2siv2di3"
5294 [(set (match_operand:V2DI 0 "register_operand" "=x")
5298 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5299 (parallel [(const_int 0) (const_int 2)])))
5302 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5303 (parallel [(const_int 0) (const_int 2)])))))]
5304 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5305 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5306 [(set_attr "type" "sseimul")
5307 (set_attr "prefix" "vex")
5308 (set_attr "mode" "TI")])
5310 (define_insn "*sse2_umulv2siv2di3"
5311 [(set (match_operand:V2DI 0 "register_operand" "=x")
5315 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5316 (parallel [(const_int 0) (const_int 2)])))
5319 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5320 (parallel [(const_int 0) (const_int 2)])))))]
5321 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5322 "pmuludq\t{%2, %0|%0, %2}"
5323 [(set_attr "type" "sseimul")
5324 (set_attr "prefix_data16" "1")
5325 (set_attr "mode" "TI")])
5327 (define_expand "sse4_1_mulv2siv2di3"
5328 [(set (match_operand:V2DI 0 "register_operand" "")
5332 (match_operand:V4SI 1 "nonimmediate_operand" "")
5333 (parallel [(const_int 0) (const_int 2)])))
5336 (match_operand:V4SI 2 "nonimmediate_operand" "")
5337 (parallel [(const_int 0) (const_int 2)])))))]
5339 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5341 (define_insn "*avx_mulv2siv2di3"
5342 [(set (match_operand:V2DI 0 "register_operand" "=x")
5346 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5347 (parallel [(const_int 0) (const_int 2)])))
5350 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5351 (parallel [(const_int 0) (const_int 2)])))))]
5352 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5353 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5354 [(set_attr "type" "sseimul")
5355 (set_attr "prefix_extra" "1")
5356 (set_attr "prefix" "vex")
5357 (set_attr "mode" "TI")])
5359 (define_insn "*sse4_1_mulv2siv2di3"
5360 [(set (match_operand:V2DI 0 "register_operand" "=x")
5364 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5365 (parallel [(const_int 0) (const_int 2)])))
5368 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5369 (parallel [(const_int 0) (const_int 2)])))))]
5370 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5371 "pmuldq\t{%2, %0|%0, %2}"
5372 [(set_attr "type" "sseimul")
5373 (set_attr "prefix_extra" "1")
5374 (set_attr "mode" "TI")])
5376 (define_expand "sse2_pmaddwd"
5377 [(set (match_operand:V4SI 0 "register_operand" "")
5382 (match_operand:V8HI 1 "nonimmediate_operand" "")
5383 (parallel [(const_int 0)
5389 (match_operand:V8HI 2 "nonimmediate_operand" "")
5390 (parallel [(const_int 0)
5396 (vec_select:V4HI (match_dup 1)
5397 (parallel [(const_int 1)
5402 (vec_select:V4HI (match_dup 2)
5403 (parallel [(const_int 1)
5406 (const_int 7)]))))))]
5408 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5410 (define_insn "*avx_pmaddwd"
5411 [(set (match_operand:V4SI 0 "register_operand" "=x")
5416 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5417 (parallel [(const_int 0)
5423 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5424 (parallel [(const_int 0)
5430 (vec_select:V4HI (match_dup 1)
5431 (parallel [(const_int 1)
5436 (vec_select:V4HI (match_dup 2)
5437 (parallel [(const_int 1)
5440 (const_int 7)]))))))]
5441 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5442 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5443 [(set_attr "type" "sseiadd")
5444 (set_attr "prefix" "vex")
5445 (set_attr "mode" "TI")])
5447 (define_insn "*sse2_pmaddwd"
5448 [(set (match_operand:V4SI 0 "register_operand" "=x")
5453 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5454 (parallel [(const_int 0)
5460 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5461 (parallel [(const_int 0)
5467 (vec_select:V4HI (match_dup 1)
5468 (parallel [(const_int 1)
5473 (vec_select:V4HI (match_dup 2)
5474 (parallel [(const_int 1)
5477 (const_int 7)]))))))]
5478 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5479 "pmaddwd\t{%2, %0|%0, %2}"
5480 [(set_attr "type" "sseiadd")
5481 (set_attr "atom_unit" "simul")
5482 (set_attr "prefix_data16" "1")
5483 (set_attr "mode" "TI")])
5485 (define_expand "mulv4si3"
5486 [(set (match_operand:V4SI 0 "register_operand" "")
5487 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5488 (match_operand:V4SI 2 "register_operand" "")))]
5491 if (TARGET_SSE4_1 || TARGET_AVX)
5492 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5495 (define_insn "*avx_mulv4si3"
5496 [(set (match_operand:V4SI 0 "register_operand" "=x")
5497 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5498 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5499 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5500 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5501 [(set_attr "type" "sseimul")
5502 (set_attr "prefix_extra" "1")
5503 (set_attr "prefix" "vex")
5504 (set_attr "mode" "TI")])
5506 (define_insn "*sse4_1_mulv4si3"
5507 [(set (match_operand:V4SI 0 "register_operand" "=x")
5508 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5509 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5510 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5511 "pmulld\t{%2, %0|%0, %2}"
5512 [(set_attr "type" "sseimul")
5513 (set_attr "prefix_extra" "1")
5514 (set_attr "mode" "TI")])
5516 (define_insn_and_split "*sse2_mulv4si3"
5517 [(set (match_operand:V4SI 0 "register_operand" "")
5518 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5519 (match_operand:V4SI 2 "register_operand" "")))]
5520 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5521 && can_create_pseudo_p ()"
5526 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5532 t1 = gen_reg_rtx (V4SImode);
5533 t2 = gen_reg_rtx (V4SImode);
5534 t3 = gen_reg_rtx (V4SImode);
5535 t4 = gen_reg_rtx (V4SImode);
5536 t5 = gen_reg_rtx (V4SImode);
5537 t6 = gen_reg_rtx (V4SImode);
5538 thirtytwo = GEN_INT (32);
5540 /* Multiply elements 2 and 0. */
5541 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5544 /* Shift both input vectors down one element, so that elements 3
5545 and 1 are now in the slots for elements 2 and 0. For K8, at
5546 least, this is faster than using a shuffle. */
5547 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5548 gen_lowpart (V1TImode, op1),
5550 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5551 gen_lowpart (V1TImode, op2),
5553 /* Multiply elements 3 and 1. */
5554 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5557 /* Move the results in element 2 down to element 1; we don't care
5558 what goes in elements 2 and 3. */
5559 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5560 const0_rtx, const0_rtx));
5561 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5562 const0_rtx, const0_rtx));
5564 /* Merge the parts back together. */
5565 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5569 (define_insn_and_split "mulv2di3"
5570 [(set (match_operand:V2DI 0 "register_operand" "")
5571 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5572 (match_operand:V2DI 2 "register_operand" "")))]
5574 && can_create_pseudo_p ()"
5579 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5588 /* op1: A,B,C,D, op2: E,F,G,H */
5589 op1 = gen_lowpart (V4SImode, op1);
5590 op2 = gen_lowpart (V4SImode, op2);
5592 t1 = gen_reg_rtx (V4SImode);
5593 t2 = gen_reg_rtx (V4SImode);
5594 t3 = gen_reg_rtx (V2DImode);
5595 t4 = gen_reg_rtx (V2DImode);
5598 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5604 /* t2: (B*E),(A*F),(D*G),(C*H) */
5605 emit_insn (gen_mulv4si3 (t2, t1, op2));
5607 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5608 emit_insn (gen_xop_phadddq (t3, t2));
5610 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5611 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5613 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5614 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5618 t1 = gen_reg_rtx (V2DImode);
5619 t2 = gen_reg_rtx (V2DImode);
5620 t3 = gen_reg_rtx (V2DImode);
5621 t4 = gen_reg_rtx (V2DImode);
5622 t5 = gen_reg_rtx (V2DImode);
5623 t6 = gen_reg_rtx (V2DImode);
5624 thirtytwo = GEN_INT (32);
5626 /* Multiply low parts. */
5627 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5628 gen_lowpart (V4SImode, op2)));
5630 /* Shift input vectors left 32 bits so we can multiply high parts. */
5631 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5632 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5634 /* Multiply high parts by low parts. */
5635 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5636 gen_lowpart (V4SImode, t3)));
5637 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5638 gen_lowpart (V4SImode, t2)));
5640 /* Shift them back. */
5641 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5642 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5644 /* Add the three parts together. */
5645 emit_insn (gen_addv2di3 (t6, t1, t4));
5646 emit_insn (gen_addv2di3 (op0, t6, t5));
5651 (define_expand "vec_widen_smult_hi_v8hi"
5652 [(match_operand:V4SI 0 "register_operand" "")
5653 (match_operand:V8HI 1 "register_operand" "")
5654 (match_operand:V8HI 2 "register_operand" "")]
5657 rtx op1, op2, t1, t2, dest;
5661 t1 = gen_reg_rtx (V8HImode);
5662 t2 = gen_reg_rtx (V8HImode);
5663 dest = gen_lowpart (V8HImode, operands[0]);
5665 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5666 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5667 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5671 (define_expand "vec_widen_smult_lo_v8hi"
5672 [(match_operand:V4SI 0 "register_operand" "")
5673 (match_operand:V8HI 1 "register_operand" "")
5674 (match_operand:V8HI 2 "register_operand" "")]
5677 rtx op1, op2, t1, t2, dest;
5681 t1 = gen_reg_rtx (V8HImode);
5682 t2 = gen_reg_rtx (V8HImode);
5683 dest = gen_lowpart (V8HImode, operands[0]);
5685 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5686 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5687 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5691 (define_expand "vec_widen_umult_hi_v8hi"
5692 [(match_operand:V4SI 0 "register_operand" "")
5693 (match_operand:V8HI 1 "register_operand" "")
5694 (match_operand:V8HI 2 "register_operand" "")]
5697 rtx op1, op2, t1, t2, dest;
5701 t1 = gen_reg_rtx (V8HImode);
5702 t2 = gen_reg_rtx (V8HImode);
5703 dest = gen_lowpart (V8HImode, operands[0]);
5705 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5706 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5707 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5711 (define_expand "vec_widen_umult_lo_v8hi"
5712 [(match_operand:V4SI 0 "register_operand" "")
5713 (match_operand:V8HI 1 "register_operand" "")
5714 (match_operand:V8HI 2 "register_operand" "")]
5717 rtx op1, op2, t1, t2, dest;
5721 t1 = gen_reg_rtx (V8HImode);
5722 t2 = gen_reg_rtx (V8HImode);
5723 dest = gen_lowpart (V8HImode, operands[0]);
5725 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5726 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5727 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5731 (define_expand "vec_widen_smult_hi_v4si"
5732 [(match_operand:V2DI 0 "register_operand" "")
5733 (match_operand:V4SI 1 "register_operand" "")
5734 (match_operand:V4SI 2 "register_operand" "")]
5739 t1 = gen_reg_rtx (V4SImode);
5740 t2 = gen_reg_rtx (V4SImode);
5742 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5747 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5752 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5756 (define_expand "vec_widen_smult_lo_v4si"
5757 [(match_operand:V2DI 0 "register_operand" "")
5758 (match_operand:V4SI 1 "register_operand" "")
5759 (match_operand:V4SI 2 "register_operand" "")]
5764 t1 = gen_reg_rtx (V4SImode);
5765 t2 = gen_reg_rtx (V4SImode);
5767 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5772 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5777 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5781 (define_expand "vec_widen_umult_hi_v4si"
5782 [(match_operand:V2DI 0 "register_operand" "")
5783 (match_operand:V4SI 1 "register_operand" "")
5784 (match_operand:V4SI 2 "register_operand" "")]
5787 rtx op1, op2, t1, t2;
5791 t1 = gen_reg_rtx (V4SImode);
5792 t2 = gen_reg_rtx (V4SImode);
5794 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5795 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5796 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5800 (define_expand "vec_widen_umult_lo_v4si"
5801 [(match_operand:V2DI 0 "register_operand" "")
5802 (match_operand:V4SI 1 "register_operand" "")
5803 (match_operand:V4SI 2 "register_operand" "")]
5806 rtx op1, op2, t1, t2;
5810 t1 = gen_reg_rtx (V4SImode);
5811 t2 = gen_reg_rtx (V4SImode);
5813 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5814 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5815 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5819 (define_expand "sdot_prodv8hi"
5820 [(match_operand:V4SI 0 "register_operand" "")
5821 (match_operand:V8HI 1 "register_operand" "")
5822 (match_operand:V8HI 2 "register_operand" "")
5823 (match_operand:V4SI 3 "register_operand" "")]
5826 rtx t = gen_reg_rtx (V4SImode);
5827 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5828 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5832 (define_expand "udot_prodv4si"
5833 [(match_operand:V2DI 0 "register_operand" "")
5834 (match_operand:V4SI 1 "register_operand" "")
5835 (match_operand:V4SI 2 "register_operand" "")
5836 (match_operand:V2DI 3 "register_operand" "")]
5841 t1 = gen_reg_rtx (V2DImode);
5842 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5843 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5845 t2 = gen_reg_rtx (V4SImode);
5846 t3 = gen_reg_rtx (V4SImode);
5847 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5848 gen_lowpart (V1TImode, operands[1]),
5850 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5851 gen_lowpart (V1TImode, operands[2]),
5854 t4 = gen_reg_rtx (V2DImode);
5855 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5857 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5861 (define_insn "*avx_ashr<mode>3"
5862 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5864 (match_operand:SSEMODE24 1 "register_operand" "x")
5865 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5867 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5868 [(set_attr "type" "sseishft")
5869 (set_attr "prefix" "vex")
5870 (set (attr "length_immediate")
5871 (if_then_else (match_operand 2 "const_int_operand" "")
5873 (const_string "0")))
5874 (set_attr "mode" "TI")])
5876 (define_insn "ashr<mode>3"
5877 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5879 (match_operand:SSEMODE24 1 "register_operand" "0")
5880 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5882 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5883 [(set_attr "type" "sseishft")
5884 (set_attr "prefix_data16" "1")
5885 (set (attr "length_immediate")
5886 (if_then_else (match_operand 2 "const_int_operand" "")
5888 (const_string "0")))
5889 (set_attr "mode" "TI")])
5891 (define_insn "*avx_lshrv1ti3"
5892 [(set (match_operand:V1TI 0 "register_operand" "=x")
5894 (match_operand:V1TI 1 "register_operand" "x")
5895 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5898 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5899 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5901 [(set_attr "type" "sseishft")
5902 (set_attr "prefix" "vex")
5903 (set_attr "length_immediate" "1")
5904 (set_attr "mode" "TI")])
5906 (define_insn "*avx_lshr<mode>3"
5907 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5908 (lshiftrt:SSEMODE248
5909 (match_operand:SSEMODE248 1 "register_operand" "x")
5910 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5912 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5913 [(set_attr "type" "sseishft")
5914 (set_attr "prefix" "vex")
5915 (set (attr "length_immediate")
5916 (if_then_else (match_operand 2 "const_int_operand" "")
5918 (const_string "0")))
5919 (set_attr "mode" "TI")])
5921 (define_insn "sse2_lshrv1ti3"
5922 [(set (match_operand:V1TI 0 "register_operand" "=x")
5924 (match_operand:V1TI 1 "register_operand" "0")
5925 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5928 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5929 return "psrldq\t{%2, %0|%0, %2}";
5931 [(set_attr "type" "sseishft")
5932 (set_attr "prefix_data16" "1")
5933 (set_attr "length_immediate" "1")
5934 (set_attr "atom_unit" "sishuf")
5935 (set_attr "mode" "TI")])
5937 (define_insn "lshr<mode>3"
5938 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5939 (lshiftrt:SSEMODE248
5940 (match_operand:SSEMODE248 1 "register_operand" "0")
5941 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5943 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5944 [(set_attr "type" "sseishft")
5945 (set_attr "prefix_data16" "1")
5946 (set (attr "length_immediate")
5947 (if_then_else (match_operand 2 "const_int_operand" "")
5949 (const_string "0")))
5950 (set_attr "mode" "TI")])
5952 (define_insn "*avx_ashlv1ti3"
5953 [(set (match_operand:V1TI 0 "register_operand" "=x")
5954 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
5955 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5958 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5959 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5961 [(set_attr "type" "sseishft")
5962 (set_attr "prefix" "vex")
5963 (set_attr "length_immediate" "1")
5964 (set_attr "mode" "TI")])
5966 (define_insn "*avx_ashl<mode>3"
5967 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5969 (match_operand:SSEMODE248 1 "register_operand" "x")
5970 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5972 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5973 [(set_attr "type" "sseishft")
5974 (set_attr "prefix" "vex")
5975 (set (attr "length_immediate")
5976 (if_then_else (match_operand 2 "const_int_operand" "")
5978 (const_string "0")))
5979 (set_attr "mode" "TI")])
5981 (define_insn "sse2_ashlv1ti3"
5982 [(set (match_operand:V1TI 0 "register_operand" "=x")
5983 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
5984 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5987 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5988 return "pslldq\t{%2, %0|%0, %2}";
5990 [(set_attr "type" "sseishft")
5991 (set_attr "prefix_data16" "1")
5992 (set_attr "length_immediate" "1")
5993 (set_attr "mode" "TI")])
5995 (define_insn "ashl<mode>3"
5996 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5998 (match_operand:SSEMODE248 1 "register_operand" "0")
5999 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6001 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6002 [(set_attr "type" "sseishft")
6003 (set_attr "prefix_data16" "1")
6004 (set (attr "length_immediate")
6005 (if_then_else (match_operand 2 "const_int_operand" "")
6007 (const_string "0")))
6008 (set_attr "mode" "TI")])
6010 (define_expand "vec_shl_<mode>"
6011 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6013 (match_operand:SSEMODEI 1 "register_operand" "")
6014 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6017 operands[0] = gen_lowpart (V1TImode, operands[0]);
6018 operands[1] = gen_lowpart (V1TImode, operands[1]);
6021 (define_expand "vec_shr_<mode>"
6022 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6024 (match_operand:SSEMODEI 1 "register_operand" "")
6025 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6028 operands[0] = gen_lowpart (V1TImode, operands[0]);
6029 operands[1] = gen_lowpart (V1TImode, operands[1]);
6032 (define_insn "*avx_<code><mode>3"
6033 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6035 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6036 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6037 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6038 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6039 [(set_attr "type" "sseiadd")
6040 (set (attr "prefix_extra")
6041 (if_then_else (match_operand:V16QI 0 "" "")
6043 (const_string "1")))
6044 (set_attr "prefix" "vex")
6045 (set_attr "mode" "TI")])
6047 (define_expand "<code>v16qi3"
6048 [(set (match_operand:V16QI 0 "register_operand" "")
6050 (match_operand:V16QI 1 "nonimmediate_operand" "")
6051 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6053 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6055 (define_insn "*<code>v16qi3"
6056 [(set (match_operand:V16QI 0 "register_operand" "=x")
6058 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6059 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6060 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6061 "p<maxmin_int>b\t{%2, %0|%0, %2}"
6062 [(set_attr "type" "sseiadd")
6063 (set_attr "prefix_data16" "1")
6064 (set_attr "mode" "TI")])
6066 (define_insn "*avx_<code><mode>3"
6067 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6069 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6070 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6071 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6072 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6073 [(set_attr "type" "sseiadd")
6074 (set (attr "prefix_extra")
6075 (if_then_else (match_operand:V8HI 0 "" "")
6077 (const_string "1")))
6078 (set_attr "prefix" "vex")
6079 (set_attr "mode" "TI")])
6081 (define_expand "<code>v8hi3"
6082 [(set (match_operand:V8HI 0 "register_operand" "")
6084 (match_operand:V8HI 1 "nonimmediate_operand" "")
6085 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6087 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6089 (define_insn "*<code>v8hi3"
6090 [(set (match_operand:V8HI 0 "register_operand" "=x")
6092 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6093 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6094 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6095 "p<maxmin_int>w\t{%2, %0|%0, %2}"
6096 [(set_attr "type" "sseiadd")
6097 (set_attr "prefix_data16" "1")
6098 (set_attr "mode" "TI")])
6100 (define_expand "umaxv8hi3"
6101 [(set (match_operand:V8HI 0 "register_operand" "")
6102 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6103 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6107 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6110 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6111 if (rtx_equal_p (op3, op2))
6112 op3 = gen_reg_rtx (V8HImode);
6113 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6114 emit_insn (gen_addv8hi3 (op0, op3, op2));
6119 (define_expand "smax<mode>3"
6120 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6121 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6122 (match_operand:SSEMODE14 2 "register_operand" "")))]
6126 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6132 xops[0] = operands[0];
6133 xops[1] = operands[1];
6134 xops[2] = operands[2];
6135 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6136 xops[4] = operands[1];
6137 xops[5] = operands[2];
6138 ok = ix86_expand_int_vcond (xops);
6144 (define_insn "*sse4_1_<code><mode>3"
6145 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6147 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6148 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6149 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6150 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6151 [(set_attr "type" "sseiadd")
6152 (set_attr "prefix_extra" "1")
6153 (set_attr "mode" "TI")])
6155 (define_expand "smaxv2di3"
6156 [(set (match_operand:V2DI 0 "register_operand" "")
6157 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6158 (match_operand:V2DI 2 "register_operand" "")))]
6164 xops[0] = operands[0];
6165 xops[1] = operands[1];
6166 xops[2] = operands[2];
6167 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6168 xops[4] = operands[1];
6169 xops[5] = operands[2];
6170 ok = ix86_expand_int_vcond (xops);
6175 (define_expand "umaxv4si3"
6176 [(set (match_operand:V4SI 0 "register_operand" "")
6177 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6178 (match_operand:V4SI 2 "register_operand" "")))]
6182 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6188 xops[0] = operands[0];
6189 xops[1] = operands[1];
6190 xops[2] = operands[2];
6191 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6192 xops[4] = operands[1];
6193 xops[5] = operands[2];
6194 ok = ix86_expand_int_vcond (xops);
6200 (define_insn "*sse4_1_<code><mode>3"
6201 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6203 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6204 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6205 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6206 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6207 [(set_attr "type" "sseiadd")
6208 (set_attr "prefix_extra" "1")
6209 (set_attr "mode" "TI")])
6211 (define_expand "umaxv2di3"
6212 [(set (match_operand:V2DI 0 "register_operand" "")
6213 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6214 (match_operand:V2DI 2 "register_operand" "")))]
6220 xops[0] = operands[0];
6221 xops[1] = operands[1];
6222 xops[2] = operands[2];
6223 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6224 xops[4] = operands[1];
6225 xops[5] = operands[2];
6226 ok = ix86_expand_int_vcond (xops);
6231 (define_expand "smin<mode>3"
6232 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6233 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6234 (match_operand:SSEMODE14 2 "register_operand" "")))]
6238 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6244 xops[0] = operands[0];
6245 xops[1] = operands[2];
6246 xops[2] = operands[1];
6247 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6248 xops[4] = operands[1];
6249 xops[5] = operands[2];
6250 ok = ix86_expand_int_vcond (xops);
6256 (define_expand "sminv2di3"
6257 [(set (match_operand:V2DI 0 "register_operand" "")
6258 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6259 (match_operand:V2DI 2 "register_operand" "")))]
6265 xops[0] = operands[0];
6266 xops[1] = operands[2];
6267 xops[2] = operands[1];
6268 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6269 xops[4] = operands[1];
6270 xops[5] = operands[2];
6271 ok = ix86_expand_int_vcond (xops);
6276 (define_expand "umin<mode>3"
6277 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6278 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6279 (match_operand:SSEMODE24 2 "register_operand" "")))]
6283 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6289 xops[0] = operands[0];
6290 xops[1] = operands[2];
6291 xops[2] = operands[1];
6292 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6293 xops[4] = operands[1];
6294 xops[5] = operands[2];
6295 ok = ix86_expand_int_vcond (xops);
6301 (define_expand "uminv2di3"
6302 [(set (match_operand:V2DI 0 "register_operand" "")
6303 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6304 (match_operand:V2DI 2 "register_operand" "")))]
6310 xops[0] = operands[0];
6311 xops[1] = operands[2];
6312 xops[2] = operands[1];
6313 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6314 xops[4] = operands[1];
6315 xops[5] = operands[2];
6316 ok = ix86_expand_int_vcond (xops);
6321 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6323 ;; Parallel integral comparisons
6325 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6327 (define_expand "sse2_eq<mode>3"
6328 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6330 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6331 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6332 "TARGET_SSE2 && !TARGET_XOP "
6333 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6335 (define_insn "*avx_eq<mode>3"
6336 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6338 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6339 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6340 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6341 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6342 [(set_attr "type" "ssecmp")
6343 (set (attr "prefix_extra")
6344 (if_then_else (match_operand:V2DI 0 "" "")
6346 (const_string "*")))
6347 (set_attr "prefix" "vex")
6348 (set_attr "mode" "TI")])
6350 (define_insn "*sse2_eq<mode>3"
6351 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6353 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6354 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6355 "TARGET_SSE2 && !TARGET_XOP
6356 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6357 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6358 [(set_attr "type" "ssecmp")
6359 (set_attr "prefix_data16" "1")
6360 (set_attr "mode" "TI")])
6362 (define_expand "sse4_1_eqv2di3"
6363 [(set (match_operand:V2DI 0 "register_operand" "")
6365 (match_operand:V2DI 1 "nonimmediate_operand" "")
6366 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6368 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6370 (define_insn "*sse4_1_eqv2di3"
6371 [(set (match_operand:V2DI 0 "register_operand" "=x")
6373 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6374 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6375 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6376 "pcmpeqq\t{%2, %0|%0, %2}"
6377 [(set_attr "type" "ssecmp")
6378 (set_attr "prefix_extra" "1")
6379 (set_attr "mode" "TI")])
6381 (define_insn "*avx_gt<mode>3"
6382 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6384 (match_operand:SSEMODE1248 1 "register_operand" "x")
6385 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6387 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6388 [(set_attr "type" "ssecmp")
6389 (set (attr "prefix_extra")
6390 (if_then_else (match_operand:V2DI 0 "" "")
6392 (const_string "*")))
6393 (set_attr "prefix" "vex")
6394 (set_attr "mode" "TI")])
6396 (define_insn "sse2_gt<mode>3"
6397 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6399 (match_operand:SSEMODE124 1 "register_operand" "0")
6400 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6401 "TARGET_SSE2 && !TARGET_XOP"
6402 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6403 [(set_attr "type" "ssecmp")
6404 (set_attr "prefix_data16" "1")
6405 (set_attr "mode" "TI")])
6407 (define_insn "sse4_2_gtv2di3"
6408 [(set (match_operand:V2DI 0 "register_operand" "=x")
6410 (match_operand:V2DI 1 "register_operand" "0")
6411 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6413 "pcmpgtq\t{%2, %0|%0, %2}"
6414 [(set_attr "type" "ssecmp")
6415 (set_attr "prefix_extra" "1")
6416 (set_attr "mode" "TI")])
6418 (define_expand "vcond<mode>"
6419 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6420 (if_then_else:SSEMODE124C8
6421 (match_operator 3 ""
6422 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6423 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6424 (match_operand:SSEMODE124C8 1 "general_operand" "")
6425 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6428 bool ok = ix86_expand_int_vcond (operands);
6433 (define_expand "vcondu<mode>"
6434 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6435 (if_then_else:SSEMODE124C8
6436 (match_operator 3 ""
6437 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6438 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6439 (match_operand:SSEMODE124C8 1 "general_operand" "")
6440 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6443 bool ok = ix86_expand_int_vcond (operands);
6448 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6450 ;; Parallel bitwise logical operations
6452 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6454 (define_expand "one_cmpl<mode>2"
6455 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6456 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6460 int i, n = GET_MODE_NUNITS (<MODE>mode);
6461 rtvec v = rtvec_alloc (n);
6463 for (i = 0; i < n; ++i)
6464 RTVEC_ELT (v, i) = constm1_rtx;
6466 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6469 (define_insn "*avx_andnot<mode>3"
6470 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6472 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6473 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6475 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6476 [(set_attr "type" "sselog")
6477 (set_attr "prefix" "vex")
6478 (set_attr "mode" "<avxvecpsmode>")])
6480 (define_insn "*sse_andnot<mode>3"
6481 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6483 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6484 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6485 "(TARGET_SSE && !TARGET_SSE2)"
6486 "andnps\t{%2, %0|%0, %2}"
6487 [(set_attr "type" "sselog")
6488 (set_attr "mode" "V4SF")])
6490 (define_insn "*avx_andnot<mode>3"
6491 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6493 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6494 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6496 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6497 [(set_attr "type" "sselog")
6498 (set_attr "prefix" "vex")
6499 (set_attr "mode" "TI")])
6501 (define_insn "sse2_andnot<mode>3"
6502 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6504 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6505 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6507 "pandn\t{%2, %0|%0, %2}"
6508 [(set_attr "type" "sselog")
6509 (set_attr "prefix_data16" "1")
6510 (set_attr "mode" "TI")])
6512 (define_insn "*andnottf3"
6513 [(set (match_operand:TF 0 "register_operand" "=x")
6515 (not:TF (match_operand:TF 1 "register_operand" "0"))
6516 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6518 "pandn\t{%2, %0|%0, %2}"
6519 [(set_attr "type" "sselog")
6520 (set_attr "prefix_data16" "1")
6521 (set_attr "mode" "TI")])
6523 (define_expand "<code><mode>3"
6524 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6526 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6527 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6529 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6531 (define_insn "*avx_<code><mode>3"
6532 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6533 (any_logic:AVX256MODEI
6534 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6535 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6537 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6538 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
6539 [(set_attr "type" "sselog")
6540 (set_attr "prefix" "vex")
6541 (set_attr "mode" "<avxvecpsmode>")])
6543 (define_insn "*sse_<code><mode>3"
6544 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6546 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6547 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6548 "(TARGET_SSE && !TARGET_SSE2)
6549 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6550 "<logic>ps\t{%2, %0|%0, %2}"
6551 [(set_attr "type" "sselog")
6552 (set_attr "mode" "V4SF")])
6554 (define_insn "*avx_<code><mode>3"
6555 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6557 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6558 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6560 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6561 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6562 [(set_attr "type" "sselog")
6563 (set_attr "prefix" "vex")
6564 (set_attr "mode" "TI")])
6566 (define_insn "*sse2_<code><mode>3"
6567 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6569 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6570 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6571 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6572 "p<logic>\t{%2, %0|%0, %2}"
6573 [(set_attr "type" "sselog")
6574 (set_attr "prefix_data16" "1")
6575 (set_attr "mode" "TI")])
6577 (define_expand "<code>tf3"
6578 [(set (match_operand:TF 0 "register_operand" "")
6580 (match_operand:TF 1 "nonimmediate_operand" "")
6581 (match_operand:TF 2 "nonimmediate_operand" "")))]
6583 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6585 (define_insn "*<code>tf3"
6586 [(set (match_operand:TF 0 "register_operand" "=x")
6588 (match_operand:TF 1 "nonimmediate_operand" "%0")
6589 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6590 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6591 "p<logic>\t{%2, %0|%0, %2}"
6592 [(set_attr "type" "sselog")
6593 (set_attr "prefix_data16" "1")
6594 (set_attr "mode" "TI")])
6596 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6598 ;; Parallel integral element swizzling
6600 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6602 (define_expand "vec_pack_trunc_v8hi"
6603 [(match_operand:V16QI 0 "register_operand" "")
6604 (match_operand:V8HI 1 "register_operand" "")
6605 (match_operand:V8HI 2 "register_operand" "")]
6608 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6609 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6610 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6614 (define_expand "vec_pack_trunc_v4si"
6615 [(match_operand:V8HI 0 "register_operand" "")
6616 (match_operand:V4SI 1 "register_operand" "")
6617 (match_operand:V4SI 2 "register_operand" "")]
6620 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6621 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6622 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6626 (define_expand "vec_pack_trunc_v2di"
6627 [(match_operand:V4SI 0 "register_operand" "")
6628 (match_operand:V2DI 1 "register_operand" "")
6629 (match_operand:V2DI 2 "register_operand" "")]
6632 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6633 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6634 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6638 (define_insn "*avx_packsswb"
6639 [(set (match_operand:V16QI 0 "register_operand" "=x")
6642 (match_operand:V8HI 1 "register_operand" "x"))
6644 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6646 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6647 [(set_attr "type" "sselog")
6648 (set_attr "prefix" "vex")
6649 (set_attr "mode" "TI")])
6651 (define_insn "sse2_packsswb"
6652 [(set (match_operand:V16QI 0 "register_operand" "=x")
6655 (match_operand:V8HI 1 "register_operand" "0"))
6657 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6659 "packsswb\t{%2, %0|%0, %2}"
6660 [(set_attr "type" "sselog")
6661 (set_attr "prefix_data16" "1")
6662 (set_attr "mode" "TI")])
6664 (define_insn "*avx_packssdw"
6665 [(set (match_operand:V8HI 0 "register_operand" "=x")
6668 (match_operand:V4SI 1 "register_operand" "x"))
6670 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6672 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6673 [(set_attr "type" "sselog")
6674 (set_attr "prefix" "vex")
6675 (set_attr "mode" "TI")])
6677 (define_insn "sse2_packssdw"
6678 [(set (match_operand:V8HI 0 "register_operand" "=x")
6681 (match_operand:V4SI 1 "register_operand" "0"))
6683 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6685 "packssdw\t{%2, %0|%0, %2}"
6686 [(set_attr "type" "sselog")
6687 (set_attr "prefix_data16" "1")
6688 (set_attr "mode" "TI")])
6690 (define_insn "*avx_packuswb"
6691 [(set (match_operand:V16QI 0 "register_operand" "=x")
6694 (match_operand:V8HI 1 "register_operand" "x"))
6696 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6698 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6699 [(set_attr "type" "sselog")
6700 (set_attr "prefix" "vex")
6701 (set_attr "mode" "TI")])
6703 (define_insn "sse2_packuswb"
6704 [(set (match_operand:V16QI 0 "register_operand" "=x")
6707 (match_operand:V8HI 1 "register_operand" "0"))
6709 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6711 "packuswb\t{%2, %0|%0, %2}"
6712 [(set_attr "type" "sselog")
6713 (set_attr "prefix_data16" "1")
6714 (set_attr "mode" "TI")])
6716 (define_insn "*avx_interleave_highv16qi"
6717 [(set (match_operand:V16QI 0 "register_operand" "=x")
6720 (match_operand:V16QI 1 "register_operand" "x")
6721 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6722 (parallel [(const_int 8) (const_int 24)
6723 (const_int 9) (const_int 25)
6724 (const_int 10) (const_int 26)
6725 (const_int 11) (const_int 27)
6726 (const_int 12) (const_int 28)
6727 (const_int 13) (const_int 29)
6728 (const_int 14) (const_int 30)
6729 (const_int 15) (const_int 31)])))]
6731 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6732 [(set_attr "type" "sselog")
6733 (set_attr "prefix" "vex")
6734 (set_attr "mode" "TI")])
6736 (define_insn "vec_interleave_highv16qi"
6737 [(set (match_operand:V16QI 0 "register_operand" "=x")
6740 (match_operand:V16QI 1 "register_operand" "0")
6741 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6742 (parallel [(const_int 8) (const_int 24)
6743 (const_int 9) (const_int 25)
6744 (const_int 10) (const_int 26)
6745 (const_int 11) (const_int 27)
6746 (const_int 12) (const_int 28)
6747 (const_int 13) (const_int 29)
6748 (const_int 14) (const_int 30)
6749 (const_int 15) (const_int 31)])))]
6751 "punpckhbw\t{%2, %0|%0, %2}"
6752 [(set_attr "type" "sselog")
6753 (set_attr "prefix_data16" "1")
6754 (set_attr "mode" "TI")])
6756 (define_insn "*avx_interleave_lowv16qi"
6757 [(set (match_operand:V16QI 0 "register_operand" "=x")
6760 (match_operand:V16QI 1 "register_operand" "x")
6761 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6762 (parallel [(const_int 0) (const_int 16)
6763 (const_int 1) (const_int 17)
6764 (const_int 2) (const_int 18)
6765 (const_int 3) (const_int 19)
6766 (const_int 4) (const_int 20)
6767 (const_int 5) (const_int 21)
6768 (const_int 6) (const_int 22)
6769 (const_int 7) (const_int 23)])))]
6771 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6772 [(set_attr "type" "sselog")
6773 (set_attr "prefix" "vex")
6774 (set_attr "mode" "TI")])
6776 (define_insn "vec_interleave_lowv16qi"
6777 [(set (match_operand:V16QI 0 "register_operand" "=x")
6780 (match_operand:V16QI 1 "register_operand" "0")
6781 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6782 (parallel [(const_int 0) (const_int 16)
6783 (const_int 1) (const_int 17)
6784 (const_int 2) (const_int 18)
6785 (const_int 3) (const_int 19)
6786 (const_int 4) (const_int 20)
6787 (const_int 5) (const_int 21)
6788 (const_int 6) (const_int 22)
6789 (const_int 7) (const_int 23)])))]
6791 "punpcklbw\t{%2, %0|%0, %2}"
6792 [(set_attr "type" "sselog")
6793 (set_attr "prefix_data16" "1")
6794 (set_attr "mode" "TI")])
6796 (define_insn "*avx_interleave_highv8hi"
6797 [(set (match_operand:V8HI 0 "register_operand" "=x")
6800 (match_operand:V8HI 1 "register_operand" "x")
6801 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6802 (parallel [(const_int 4) (const_int 12)
6803 (const_int 5) (const_int 13)
6804 (const_int 6) (const_int 14)
6805 (const_int 7) (const_int 15)])))]
6807 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6808 [(set_attr "type" "sselog")
6809 (set_attr "prefix" "vex")
6810 (set_attr "mode" "TI")])
6812 (define_insn "vec_interleave_highv8hi"
6813 [(set (match_operand:V8HI 0 "register_operand" "=x")
6816 (match_operand:V8HI 1 "register_operand" "0")
6817 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6818 (parallel [(const_int 4) (const_int 12)
6819 (const_int 5) (const_int 13)
6820 (const_int 6) (const_int 14)
6821 (const_int 7) (const_int 15)])))]
6823 "punpckhwd\t{%2, %0|%0, %2}"
6824 [(set_attr "type" "sselog")
6825 (set_attr "prefix_data16" "1")
6826 (set_attr "mode" "TI")])
6828 (define_insn "*avx_interleave_lowv8hi"
6829 [(set (match_operand:V8HI 0 "register_operand" "=x")
6832 (match_operand:V8HI 1 "register_operand" "x")
6833 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6834 (parallel [(const_int 0) (const_int 8)
6835 (const_int 1) (const_int 9)
6836 (const_int 2) (const_int 10)
6837 (const_int 3) (const_int 11)])))]
6839 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6840 [(set_attr "type" "sselog")
6841 (set_attr "prefix" "vex")
6842 (set_attr "mode" "TI")])
6844 (define_insn "vec_interleave_lowv8hi"
6845 [(set (match_operand:V8HI 0 "register_operand" "=x")
6848 (match_operand:V8HI 1 "register_operand" "0")
6849 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6850 (parallel [(const_int 0) (const_int 8)
6851 (const_int 1) (const_int 9)
6852 (const_int 2) (const_int 10)
6853 (const_int 3) (const_int 11)])))]
6855 "punpcklwd\t{%2, %0|%0, %2}"
6856 [(set_attr "type" "sselog")
6857 (set_attr "prefix_data16" "1")
6858 (set_attr "mode" "TI")])
6860 (define_insn "*avx_interleave_highv4si"
6861 [(set (match_operand:V4SI 0 "register_operand" "=x")
6864 (match_operand:V4SI 1 "register_operand" "x")
6865 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6866 (parallel [(const_int 2) (const_int 6)
6867 (const_int 3) (const_int 7)])))]
6869 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6870 [(set_attr "type" "sselog")
6871 (set_attr "prefix" "vex")
6872 (set_attr "mode" "TI")])
6874 (define_insn "vec_interleave_highv4si"
6875 [(set (match_operand:V4SI 0 "register_operand" "=x")
6878 (match_operand:V4SI 1 "register_operand" "0")
6879 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6880 (parallel [(const_int 2) (const_int 6)
6881 (const_int 3) (const_int 7)])))]
6883 "punpckhdq\t{%2, %0|%0, %2}"
6884 [(set_attr "type" "sselog")
6885 (set_attr "prefix_data16" "1")
6886 (set_attr "mode" "TI")])
6888 (define_insn "*avx_interleave_lowv4si"
6889 [(set (match_operand:V4SI 0 "register_operand" "=x")
6892 (match_operand:V4SI 1 "register_operand" "x")
6893 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6894 (parallel [(const_int 0) (const_int 4)
6895 (const_int 1) (const_int 5)])))]
6897 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6898 [(set_attr "type" "sselog")
6899 (set_attr "prefix" "vex")
6900 (set_attr "mode" "TI")])
6902 (define_insn "vec_interleave_lowv4si"
6903 [(set (match_operand:V4SI 0 "register_operand" "=x")
6906 (match_operand:V4SI 1 "register_operand" "0")
6907 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6908 (parallel [(const_int 0) (const_int 4)
6909 (const_int 1) (const_int 5)])))]
6911 "punpckldq\t{%2, %0|%0, %2}"
6912 [(set_attr "type" "sselog")
6913 (set_attr "prefix_data16" "1")
6914 (set_attr "mode" "TI")])
6916 (define_insn "*avx_pinsr<ssevecsize>"
6917 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6918 (vec_merge:SSEMODE124
6919 (vec_duplicate:SSEMODE124
6920 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6921 (match_operand:SSEMODE124 1 "register_operand" "x")
6922 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6925 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6926 if (MEM_P (operands[2]))
6927 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6929 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6931 [(set_attr "type" "sselog")
6932 (set (attr "prefix_extra")
6933 (if_then_else (match_operand:V8HI 0 "" "")
6935 (const_string "1")))
6936 (set_attr "length_immediate" "1")
6937 (set_attr "prefix" "vex")
6938 (set_attr "mode" "TI")])
6940 (define_insn "*sse4_1_pinsrb"
6941 [(set (match_operand:V16QI 0 "register_operand" "=x")
6943 (vec_duplicate:V16QI
6944 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6945 (match_operand:V16QI 1 "register_operand" "0")
6946 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6949 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6950 if (MEM_P (operands[2]))
6951 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6953 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6955 [(set_attr "type" "sselog")
6956 (set_attr "prefix_extra" "1")
6957 (set_attr "length_immediate" "1")
6958 (set_attr "mode" "TI")])
6960 (define_insn "*sse2_pinsrw"
6961 [(set (match_operand:V8HI 0 "register_operand" "=x")
6964 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6965 (match_operand:V8HI 1 "register_operand" "0")
6966 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6969 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6970 if (MEM_P (operands[2]))
6971 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6973 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6975 [(set_attr "type" "sselog")
6976 (set_attr "prefix_data16" "1")
6977 (set_attr "length_immediate" "1")
6978 (set_attr "mode" "TI")])
6980 ;; It must come before sse2_loadld since it is preferred.
6981 (define_insn "*sse4_1_pinsrd"
6982 [(set (match_operand:V4SI 0 "register_operand" "=x")
6985 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6986 (match_operand:V4SI 1 "register_operand" "0")
6987 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6990 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6991 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6993 [(set_attr "type" "sselog")
6994 (set_attr "prefix_extra" "1")
6995 (set_attr "length_immediate" "1")
6996 (set_attr "mode" "TI")])
6998 (define_insn "*avx_pinsrq"
6999 [(set (match_operand:V2DI 0 "register_operand" "=x")
7002 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7003 (match_operand:V2DI 1 "register_operand" "x")
7004 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7005 "TARGET_AVX && TARGET_64BIT"
7007 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7008 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7010 [(set_attr "type" "sselog")
7011 (set_attr "prefix_extra" "1")
7012 (set_attr "length_immediate" "1")
7013 (set_attr "prefix" "vex")
7014 (set_attr "mode" "TI")])
7016 (define_insn "*sse4_1_pinsrq"
7017 [(set (match_operand:V2DI 0 "register_operand" "=x")
7020 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7021 (match_operand:V2DI 1 "register_operand" "0")
7022 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7023 "TARGET_SSE4_1 && TARGET_64BIT"
7025 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7026 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7028 [(set_attr "type" "sselog")
7029 (set_attr "prefix_rex" "1")
7030 (set_attr "prefix_extra" "1")
7031 (set_attr "length_immediate" "1")
7032 (set_attr "mode" "TI")])
7034 (define_insn "*sse4_1_pextrb_<mode>"
7035 [(set (match_operand:SWI48 0 "register_operand" "=r")
7038 (match_operand:V16QI 1 "register_operand" "x")
7039 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7041 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7042 [(set_attr "type" "sselog")
7043 (set_attr "prefix_extra" "1")
7044 (set_attr "length_immediate" "1")
7045 (set_attr "prefix" "maybe_vex")
7046 (set_attr "mode" "TI")])
7048 (define_insn "*sse4_1_pextrb_memory"
7049 [(set (match_operand:QI 0 "memory_operand" "=m")
7051 (match_operand:V16QI 1 "register_operand" "x")
7052 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7054 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7055 [(set_attr "type" "sselog")
7056 (set_attr "prefix_extra" "1")
7057 (set_attr "length_immediate" "1")
7058 (set_attr "prefix" "maybe_vex")
7059 (set_attr "mode" "TI")])
7061 (define_insn "*sse2_pextrw_<mode>"
7062 [(set (match_operand:SWI48 0 "register_operand" "=r")
7065 (match_operand:V8HI 1 "register_operand" "x")
7066 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7068 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7069 [(set_attr "type" "sselog")
7070 (set_attr "prefix_data16" "1")
7071 (set_attr "length_immediate" "1")
7072 (set_attr "prefix" "maybe_vex")
7073 (set_attr "mode" "TI")])
7075 (define_insn "*sse4_1_pextrw_memory"
7076 [(set (match_operand:HI 0 "memory_operand" "=m")
7078 (match_operand:V8HI 1 "register_operand" "x")
7079 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7081 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7082 [(set_attr "type" "sselog")
7083 (set_attr "prefix_extra" "1")
7084 (set_attr "length_immediate" "1")
7085 (set_attr "prefix" "maybe_vex")
7086 (set_attr "mode" "TI")])
7088 (define_insn "*sse4_1_pextrd"
7089 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7091 (match_operand:V4SI 1 "register_operand" "x")
7092 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7094 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7095 [(set_attr "type" "sselog")
7096 (set_attr "prefix_extra" "1")
7097 (set_attr "length_immediate" "1")
7098 (set_attr "prefix" "maybe_vex")
7099 (set_attr "mode" "TI")])
7101 (define_insn "*sse4_1_pextrd_zext"
7102 [(set (match_operand:DI 0 "register_operand" "=r")
7105 (match_operand:V4SI 1 "register_operand" "x")
7106 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7107 "TARGET_64BIT && TARGET_SSE4_1"
7108 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7109 [(set_attr "type" "sselog")
7110 (set_attr "prefix_extra" "1")
7111 (set_attr "length_immediate" "1")
7112 (set_attr "prefix" "maybe_vex")
7113 (set_attr "mode" "TI")])
7115 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7116 (define_insn "*sse4_1_pextrq"
7117 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7119 (match_operand:V2DI 1 "register_operand" "x")
7120 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7121 "TARGET_SSE4_1 && TARGET_64BIT"
7122 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7123 [(set_attr "type" "sselog")
7124 (set_attr "prefix_rex" "1")
7125 (set_attr "prefix_extra" "1")
7126 (set_attr "length_immediate" "1")
7127 (set_attr "prefix" "maybe_vex")
7128 (set_attr "mode" "TI")])
7130 (define_expand "sse2_pshufd"
7131 [(match_operand:V4SI 0 "register_operand" "")
7132 (match_operand:V4SI 1 "nonimmediate_operand" "")
7133 (match_operand:SI 2 "const_int_operand" "")]
7136 int mask = INTVAL (operands[2]);
7137 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7138 GEN_INT ((mask >> 0) & 3),
7139 GEN_INT ((mask >> 2) & 3),
7140 GEN_INT ((mask >> 4) & 3),
7141 GEN_INT ((mask >> 6) & 3)));
7145 (define_insn "sse2_pshufd_1"
7146 [(set (match_operand:V4SI 0 "register_operand" "=x")
7148 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7149 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7150 (match_operand 3 "const_0_to_3_operand" "")
7151 (match_operand 4 "const_0_to_3_operand" "")
7152 (match_operand 5 "const_0_to_3_operand" "")])))]
7156 mask |= INTVAL (operands[2]) << 0;
7157 mask |= INTVAL (operands[3]) << 2;
7158 mask |= INTVAL (operands[4]) << 4;
7159 mask |= INTVAL (operands[5]) << 6;
7160 operands[2] = GEN_INT (mask);
7162 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7164 [(set_attr "type" "sselog1")
7165 (set_attr "prefix_data16" "1")
7166 (set_attr "prefix" "maybe_vex")
7167 (set_attr "length_immediate" "1")
7168 (set_attr "mode" "TI")])
7170 (define_expand "sse2_pshuflw"
7171 [(match_operand:V8HI 0 "register_operand" "")
7172 (match_operand:V8HI 1 "nonimmediate_operand" "")
7173 (match_operand:SI 2 "const_int_operand" "")]
7176 int mask = INTVAL (operands[2]);
7177 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7178 GEN_INT ((mask >> 0) & 3),
7179 GEN_INT ((mask >> 2) & 3),
7180 GEN_INT ((mask >> 4) & 3),
7181 GEN_INT ((mask >> 6) & 3)));
7185 (define_insn "sse2_pshuflw_1"
7186 [(set (match_operand:V8HI 0 "register_operand" "=x")
7188 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7189 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7190 (match_operand 3 "const_0_to_3_operand" "")
7191 (match_operand 4 "const_0_to_3_operand" "")
7192 (match_operand 5 "const_0_to_3_operand" "")
7200 mask |= INTVAL (operands[2]) << 0;
7201 mask |= INTVAL (operands[3]) << 2;
7202 mask |= INTVAL (operands[4]) << 4;
7203 mask |= INTVAL (operands[5]) << 6;
7204 operands[2] = GEN_INT (mask);
7206 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7208 [(set_attr "type" "sselog")
7209 (set_attr "prefix_data16" "0")
7210 (set_attr "prefix_rep" "1")
7211 (set_attr "prefix" "maybe_vex")
7212 (set_attr "length_immediate" "1")
7213 (set_attr "mode" "TI")])
7215 (define_expand "sse2_pshufhw"
7216 [(match_operand:V8HI 0 "register_operand" "")
7217 (match_operand:V8HI 1 "nonimmediate_operand" "")
7218 (match_operand:SI 2 "const_int_operand" "")]
7221 int mask = INTVAL (operands[2]);
7222 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7223 GEN_INT (((mask >> 0) & 3) + 4),
7224 GEN_INT (((mask >> 2) & 3) + 4),
7225 GEN_INT (((mask >> 4) & 3) + 4),
7226 GEN_INT (((mask >> 6) & 3) + 4)));
7230 (define_insn "sse2_pshufhw_1"
7231 [(set (match_operand:V8HI 0 "register_operand" "=x")
7233 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7234 (parallel [(const_int 0)
7238 (match_operand 2 "const_4_to_7_operand" "")
7239 (match_operand 3 "const_4_to_7_operand" "")
7240 (match_operand 4 "const_4_to_7_operand" "")
7241 (match_operand 5 "const_4_to_7_operand" "")])))]
7245 mask |= (INTVAL (operands[2]) - 4) << 0;
7246 mask |= (INTVAL (operands[3]) - 4) << 2;
7247 mask |= (INTVAL (operands[4]) - 4) << 4;
7248 mask |= (INTVAL (operands[5]) - 4) << 6;
7249 operands[2] = GEN_INT (mask);
7251 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7253 [(set_attr "type" "sselog")
7254 (set_attr "prefix_rep" "1")
7255 (set_attr "prefix_data16" "0")
7256 (set_attr "prefix" "maybe_vex")
7257 (set_attr "length_immediate" "1")
7258 (set_attr "mode" "TI")])
7260 (define_expand "sse2_loadd"
7261 [(set (match_operand:V4SI 0 "register_operand" "")
7264 (match_operand:SI 1 "nonimmediate_operand" ""))
7268 "operands[2] = CONST0_RTX (V4SImode);")
7270 (define_insn "*avx_loadld"
7271 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7274 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7275 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7279 vmovd\t{%2, %0|%0, %2}
7280 vmovd\t{%2, %0|%0, %2}
7281 vmovss\t{%2, %1, %0|%0, %1, %2}"
7282 [(set_attr "type" "ssemov")
7283 (set_attr "prefix" "vex")
7284 (set_attr "mode" "TI,TI,V4SF")])
7286 (define_insn "sse2_loadld"
7287 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7290 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7291 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7295 movd\t{%2, %0|%0, %2}
7296 movd\t{%2, %0|%0, %2}
7297 movss\t{%2, %0|%0, %2}
7298 movss\t{%2, %0|%0, %2}"
7299 [(set_attr "type" "ssemov")
7300 (set_attr "mode" "TI,TI,V4SF,SF")])
7302 (define_insn_and_split "sse2_stored"
7303 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7305 (match_operand:V4SI 1 "register_operand" "x,Yi")
7306 (parallel [(const_int 0)])))]
7309 "&& reload_completed
7310 && (TARGET_INTER_UNIT_MOVES
7311 || MEM_P (operands [0])
7312 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7313 [(set (match_dup 0) (match_dup 1))]
7314 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7316 (define_insn_and_split "*vec_ext_v4si_mem"
7317 [(set (match_operand:SI 0 "register_operand" "=r")
7319 (match_operand:V4SI 1 "memory_operand" "o")
7320 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7326 int i = INTVAL (operands[2]);
7328 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7332 (define_expand "sse_storeq"
7333 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7335 (match_operand:V2DI 1 "register_operand" "")
7336 (parallel [(const_int 0)])))]
7339 (define_insn "*sse2_storeq_rex64"
7340 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7342 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7343 (parallel [(const_int 0)])))]
7344 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7348 %vmov{q}\t{%1, %0|%0, %1}"
7349 [(set_attr "type" "*,*,imov")
7350 (set_attr "prefix" "*,*,maybe_vex")
7351 (set_attr "mode" "*,*,DI")])
7353 (define_insn "*sse2_storeq"
7354 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7356 (match_operand:V2DI 1 "register_operand" "x")
7357 (parallel [(const_int 0)])))]
7362 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7364 (match_operand:V2DI 1 "register_operand" "")
7365 (parallel [(const_int 0)])))]
7368 && (TARGET_INTER_UNIT_MOVES
7369 || MEM_P (operands [0])
7370 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7371 [(set (match_dup 0) (match_dup 1))]
7372 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7374 (define_insn "*vec_extractv2di_1_rex64_avx"
7375 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7377 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7378 (parallel [(const_int 1)])))]
7381 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7383 vmovhps\t{%1, %0|%0, %1}
7384 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7385 vmovq\t{%H1, %0|%0, %H1}
7386 vmov{q}\t{%H1, %0|%0, %H1}"
7387 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7388 (set_attr "length_immediate" "*,1,*,*")
7389 (set_attr "memory" "*,none,*,*")
7390 (set_attr "prefix" "vex")
7391 (set_attr "mode" "V2SF,TI,TI,DI")])
7393 (define_insn "*vec_extractv2di_1_rex64"
7394 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7396 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7397 (parallel [(const_int 1)])))]
7398 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7400 movhps\t{%1, %0|%0, %1}
7401 psrldq\t{$8, %0|%0, 8}
7402 movq\t{%H1, %0|%0, %H1}
7403 mov{q}\t{%H1, %0|%0, %H1}"
7404 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7405 (set_attr "length_immediate" "*,1,*,*")
7406 (set_attr "memory" "*,none,*,*")
7407 (set_attr "mode" "V2SF,TI,TI,DI")])
7409 (define_insn "*vec_extractv2di_1_avx"
7410 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7412 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7413 (parallel [(const_int 1)])))]
7416 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7418 vmovhps\t{%1, %0|%0, %1}
7419 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7420 vmovq\t{%H1, %0|%0, %H1}"
7421 [(set_attr "type" "ssemov,sseishft1,ssemov")
7422 (set_attr "length_immediate" "*,1,*")
7423 (set_attr "memory" "*,none,*")
7424 (set_attr "prefix" "vex")
7425 (set_attr "mode" "V2SF,TI,TI")])
7427 (define_insn "*vec_extractv2di_1_sse2"
7428 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7430 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7431 (parallel [(const_int 1)])))]
7433 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7435 movhps\t{%1, %0|%0, %1}
7436 psrldq\t{$8, %0|%0, 8}
7437 movq\t{%H1, %0|%0, %H1}"
7438 [(set_attr "type" "ssemov,sseishft1,ssemov")
7439 (set_attr "length_immediate" "*,1,*")
7440 (set_attr "memory" "*,none,*")
7441 (set_attr "mode" "V2SF,TI,TI")])
7443 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7444 (define_insn "*vec_extractv2di_1_sse"
7445 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7447 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7448 (parallel [(const_int 1)])))]
7449 "!TARGET_SSE2 && TARGET_SSE
7450 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7452 movhps\t{%1, %0|%0, %1}
7453 movhlps\t{%1, %0|%0, %1}
7454 movlps\t{%H1, %0|%0, %H1}"
7455 [(set_attr "type" "ssemov")
7456 (set_attr "mode" "V2SF,V4SF,V2SF")])
7458 (define_insn "*vec_dupv4si_avx"
7459 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7461 (match_operand:SI 1 "register_operand" "x,m")))]
7464 vpshufd\t{$0, %1, %0|%0, %1, 0}
7465 vbroadcastss\t{%1, %0|%0, %1}"
7466 [(set_attr "type" "sselog1,ssemov")
7467 (set_attr "length_immediate" "1,0")
7468 (set_attr "prefix_extra" "0,1")
7469 (set_attr "prefix" "vex")
7470 (set_attr "mode" "TI,V4SF")])
7472 (define_insn "*vec_dupv4si"
7473 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7475 (match_operand:SI 1 "register_operand" " Y2,0")))]
7478 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7479 shufps\t{$0, %0, %0|%0, %0, 0}"
7480 [(set_attr "type" "sselog1")
7481 (set_attr "length_immediate" "1")
7482 (set_attr "mode" "TI,V4SF")])
7484 (define_insn "*vec_dupv2di_avx"
7485 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7487 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7490 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7491 vmovddup\t{%1, %0|%0, %1}"
7492 [(set_attr "type" "sselog1")
7493 (set_attr "prefix" "vex")
7494 (set_attr "mode" "TI,DF")])
7496 (define_insn "*vec_dupv2di_sse3"
7497 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7499 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7503 movddup\t{%1, %0|%0, %1}"
7504 [(set_attr "type" "sselog1")
7505 (set_attr "mode" "TI,DF")])
7507 (define_insn "*vec_dupv2di"
7508 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7510 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7515 [(set_attr "type" "sselog1,ssemov")
7516 (set_attr "mode" "TI,V4SF")])
7518 (define_insn "*vec_concatv2si_avx"
7519 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7521 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7522 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7525 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7526 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7527 vmovd\t{%1, %0|%0, %1}
7528 punpckldq\t{%2, %0|%0, %2}
7529 movd\t{%1, %0|%0, %1}"
7530 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7531 (set_attr "prefix_extra" "1,*,*,*,*")
7532 (set_attr "length_immediate" "1,*,*,*,*")
7533 (set (attr "prefix")
7534 (if_then_else (eq_attr "alternative" "3,4")
7535 (const_string "orig")
7536 (const_string "vex")))
7537 (set_attr "mode" "TI,TI,TI,DI,DI")])
7539 (define_insn "*vec_concatv2si_sse4_1"
7540 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7542 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7543 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7546 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7547 punpckldq\t{%2, %0|%0, %2}
7548 movd\t{%1, %0|%0, %1}
7549 punpckldq\t{%2, %0|%0, %2}
7550 movd\t{%1, %0|%0, %1}"
7551 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7552 (set_attr "prefix_extra" "1,*,*,*,*")
7553 (set_attr "length_immediate" "1,*,*,*,*")
7554 (set_attr "mode" "TI,TI,TI,DI,DI")])
7556 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7557 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7558 ;; alternatives pretty much forces the MMX alternative to be chosen.
7559 (define_insn "*vec_concatv2si_sse2"
7560 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7562 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7563 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7566 punpckldq\t{%2, %0|%0, %2}
7567 movd\t{%1, %0|%0, %1}
7568 punpckldq\t{%2, %0|%0, %2}
7569 movd\t{%1, %0|%0, %1}"
7570 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7571 (set_attr "mode" "TI,TI,DI,DI")])
7573 (define_insn "*vec_concatv2si_sse"
7574 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7576 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7577 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7580 unpcklps\t{%2, %0|%0, %2}
7581 movss\t{%1, %0|%0, %1}
7582 punpckldq\t{%2, %0|%0, %2}
7583 movd\t{%1, %0|%0, %1}"
7584 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7585 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7587 (define_insn "*vec_concatv4si_1_avx"
7588 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7590 (match_operand:V2SI 1 "register_operand" " x,x")
7591 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7594 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7595 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7596 [(set_attr "type" "sselog,ssemov")
7597 (set_attr "prefix" "vex")
7598 (set_attr "mode" "TI,V2SF")])
7600 (define_insn "*vec_concatv4si_1"
7601 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7603 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7604 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7607 punpcklqdq\t{%2, %0|%0, %2}
7608 movlhps\t{%2, %0|%0, %2}
7609 movhps\t{%2, %0|%0, %2}"
7610 [(set_attr "type" "sselog,ssemov,ssemov")
7611 (set_attr "mode" "TI,V4SF,V2SF")])
7613 (define_insn "*vec_concatv2di_avx"
7614 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7616 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7617 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7618 "!TARGET_64BIT && TARGET_AVX"
7620 vmovq\t{%1, %0|%0, %1}
7621 movq2dq\t{%1, %0|%0, %1}
7622 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7623 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7624 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7625 (set (attr "prefix")
7626 (if_then_else (eq_attr "alternative" "1")
7627 (const_string "orig")
7628 (const_string "vex")))
7629 (set_attr "mode" "TI,TI,TI,V2SF")])
7631 (define_insn "vec_concatv2di"
7632 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7634 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7635 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7636 "!TARGET_64BIT && TARGET_SSE"
7638 movq\t{%1, %0|%0, %1}
7639 movq2dq\t{%1, %0|%0, %1}
7640 punpcklqdq\t{%2, %0|%0, %2}
7641 movlhps\t{%2, %0|%0, %2}
7642 movhps\t{%2, %0|%0, %2}"
7643 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7644 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7646 (define_insn "*vec_concatv2di_rex64_avx"
7647 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7649 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7650 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7651 "TARGET_64BIT && TARGET_AVX"
7653 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7654 vmovq\t{%1, %0|%0, %1}
7655 vmovq\t{%1, %0|%0, %1}
7656 movq2dq\t{%1, %0|%0, %1}
7657 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7658 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7659 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7660 (set_attr "prefix_extra" "1,*,*,*,*,*")
7661 (set_attr "length_immediate" "1,*,*,*,*,*")
7662 (set (attr "prefix")
7663 (if_then_else (eq_attr "alternative" "3")
7664 (const_string "orig")
7665 (const_string "vex")))
7666 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7668 (define_insn "*vec_concatv2di_rex64_sse4_1"
7669 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7671 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7672 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7673 "TARGET_64BIT && TARGET_SSE4_1"
7675 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7676 movq\t{%1, %0|%0, %1}
7677 movq\t{%1, %0|%0, %1}
7678 movq2dq\t{%1, %0|%0, %1}
7679 punpcklqdq\t{%2, %0|%0, %2}
7680 movlhps\t{%2, %0|%0, %2}
7681 movhps\t{%2, %0|%0, %2}"
7682 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7683 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7684 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7685 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7686 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7688 (define_insn "*vec_concatv2di_rex64_sse"
7689 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7691 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7692 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7693 "TARGET_64BIT && TARGET_SSE"
7695 movq\t{%1, %0|%0, %1}
7696 movq\t{%1, %0|%0, %1}
7697 movq2dq\t{%1, %0|%0, %1}
7698 punpcklqdq\t{%2, %0|%0, %2}
7699 movlhps\t{%2, %0|%0, %2}
7700 movhps\t{%2, %0|%0, %2}"
7701 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7702 (set_attr "prefix_rex" "*,1,*,*,*,*")
7703 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7705 (define_expand "vec_unpacku_hi_v16qi"
7706 [(match_operand:V8HI 0 "register_operand" "")
7707 (match_operand:V16QI 1 "register_operand" "")]
7711 ix86_expand_sse4_unpack (operands, true, true);
7713 ix86_expand_sse_unpack (operands, true, true);
7717 (define_expand "vec_unpacks_hi_v16qi"
7718 [(match_operand:V8HI 0 "register_operand" "")
7719 (match_operand:V16QI 1 "register_operand" "")]
7723 ix86_expand_sse4_unpack (operands, false, true);
7725 ix86_expand_sse_unpack (operands, false, true);
7729 (define_expand "vec_unpacku_lo_v16qi"
7730 [(match_operand:V8HI 0 "register_operand" "")
7731 (match_operand:V16QI 1 "register_operand" "")]
7735 ix86_expand_sse4_unpack (operands, true, false);
7737 ix86_expand_sse_unpack (operands, true, false);
7741 (define_expand "vec_unpacks_lo_v16qi"
7742 [(match_operand:V8HI 0 "register_operand" "")
7743 (match_operand:V16QI 1 "register_operand" "")]
7747 ix86_expand_sse4_unpack (operands, false, false);
7749 ix86_expand_sse_unpack (operands, false, false);
7753 (define_expand "vec_unpacku_hi_v8hi"
7754 [(match_operand:V4SI 0 "register_operand" "")
7755 (match_operand:V8HI 1 "register_operand" "")]
7759 ix86_expand_sse4_unpack (operands, true, true);
7761 ix86_expand_sse_unpack (operands, true, true);
7765 (define_expand "vec_unpacks_hi_v8hi"
7766 [(match_operand:V4SI 0 "register_operand" "")
7767 (match_operand:V8HI 1 "register_operand" "")]
7771 ix86_expand_sse4_unpack (operands, false, true);
7773 ix86_expand_sse_unpack (operands, false, true);
7777 (define_expand "vec_unpacku_lo_v8hi"
7778 [(match_operand:V4SI 0 "register_operand" "")
7779 (match_operand:V8HI 1 "register_operand" "")]
7783 ix86_expand_sse4_unpack (operands, true, false);
7785 ix86_expand_sse_unpack (operands, true, false);
7789 (define_expand "vec_unpacks_lo_v8hi"
7790 [(match_operand:V4SI 0 "register_operand" "")
7791 (match_operand:V8HI 1 "register_operand" "")]
7795 ix86_expand_sse4_unpack (operands, false, false);
7797 ix86_expand_sse_unpack (operands, false, false);
7801 (define_expand "vec_unpacku_hi_v4si"
7802 [(match_operand:V2DI 0 "register_operand" "")
7803 (match_operand:V4SI 1 "register_operand" "")]
7807 ix86_expand_sse4_unpack (operands, true, true);
7809 ix86_expand_sse_unpack (operands, true, true);
7813 (define_expand "vec_unpacks_hi_v4si"
7814 [(match_operand:V2DI 0 "register_operand" "")
7815 (match_operand:V4SI 1 "register_operand" "")]
7819 ix86_expand_sse4_unpack (operands, false, true);
7821 ix86_expand_sse_unpack (operands, false, true);
7825 (define_expand "vec_unpacku_lo_v4si"
7826 [(match_operand:V2DI 0 "register_operand" "")
7827 (match_operand:V4SI 1 "register_operand" "")]
7831 ix86_expand_sse4_unpack (operands, true, false);
7833 ix86_expand_sse_unpack (operands, true, false);
7837 (define_expand "vec_unpacks_lo_v4si"
7838 [(match_operand:V2DI 0 "register_operand" "")
7839 (match_operand:V4SI 1 "register_operand" "")]
7843 ix86_expand_sse4_unpack (operands, false, false);
7845 ix86_expand_sse_unpack (operands, false, false);
7849 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7853 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7855 (define_expand "sse2_uavgv16qi3"
7856 [(set (match_operand:V16QI 0 "register_operand" "")
7862 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7864 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7865 (const_vector:V16QI [(const_int 1) (const_int 1)
7866 (const_int 1) (const_int 1)
7867 (const_int 1) (const_int 1)
7868 (const_int 1) (const_int 1)
7869 (const_int 1) (const_int 1)
7870 (const_int 1) (const_int 1)
7871 (const_int 1) (const_int 1)
7872 (const_int 1) (const_int 1)]))
7875 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7877 (define_insn "*avx_uavgv16qi3"
7878 [(set (match_operand:V16QI 0 "register_operand" "=x")
7884 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7886 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7887 (const_vector:V16QI [(const_int 1) (const_int 1)
7888 (const_int 1) (const_int 1)
7889 (const_int 1) (const_int 1)
7890 (const_int 1) (const_int 1)
7891 (const_int 1) (const_int 1)
7892 (const_int 1) (const_int 1)
7893 (const_int 1) (const_int 1)
7894 (const_int 1) (const_int 1)]))
7896 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7897 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7898 [(set_attr "type" "sseiadd")
7899 (set_attr "prefix" "vex")
7900 (set_attr "mode" "TI")])
7902 (define_insn "*sse2_uavgv16qi3"
7903 [(set (match_operand:V16QI 0 "register_operand" "=x")
7909 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7911 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7912 (const_vector:V16QI [(const_int 1) (const_int 1)
7913 (const_int 1) (const_int 1)
7914 (const_int 1) (const_int 1)
7915 (const_int 1) (const_int 1)
7916 (const_int 1) (const_int 1)
7917 (const_int 1) (const_int 1)
7918 (const_int 1) (const_int 1)
7919 (const_int 1) (const_int 1)]))
7921 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7922 "pavgb\t{%2, %0|%0, %2}"
7923 [(set_attr "type" "sseiadd")
7924 (set_attr "prefix_data16" "1")
7925 (set_attr "mode" "TI")])
7927 (define_expand "sse2_uavgv8hi3"
7928 [(set (match_operand:V8HI 0 "register_operand" "")
7934 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7936 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7937 (const_vector:V8HI [(const_int 1) (const_int 1)
7938 (const_int 1) (const_int 1)
7939 (const_int 1) (const_int 1)
7940 (const_int 1) (const_int 1)]))
7943 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7945 (define_insn "*avx_uavgv8hi3"
7946 [(set (match_operand:V8HI 0 "register_operand" "=x")
7952 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7954 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7955 (const_vector:V8HI [(const_int 1) (const_int 1)
7956 (const_int 1) (const_int 1)
7957 (const_int 1) (const_int 1)
7958 (const_int 1) (const_int 1)]))
7960 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7961 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7962 [(set_attr "type" "sseiadd")
7963 (set_attr "prefix" "vex")
7964 (set_attr "mode" "TI")])
7966 (define_insn "*sse2_uavgv8hi3"
7967 [(set (match_operand:V8HI 0 "register_operand" "=x")
7973 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7975 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7976 (const_vector:V8HI [(const_int 1) (const_int 1)
7977 (const_int 1) (const_int 1)
7978 (const_int 1) (const_int 1)
7979 (const_int 1) (const_int 1)]))
7981 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7982 "pavgw\t{%2, %0|%0, %2}"
7983 [(set_attr "type" "sseiadd")
7984 (set_attr "prefix_data16" "1")
7985 (set_attr "mode" "TI")])
7987 ;; The correct representation for this is absolutely enormous, and
7988 ;; surely not generally useful.
7989 (define_insn "*avx_psadbw"
7990 [(set (match_operand:V2DI 0 "register_operand" "=x")
7991 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7992 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7995 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7996 [(set_attr "type" "sseiadd")
7997 (set_attr "prefix" "vex")
7998 (set_attr "mode" "TI")])
8000 (define_insn "sse2_psadbw"
8001 [(set (match_operand:V2DI 0 "register_operand" "=x")
8002 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
8003 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8006 "psadbw\t{%2, %0|%0, %2}"
8007 [(set_attr "type" "sseiadd")
8008 (set_attr "atom_unit" "simul")
8009 (set_attr "prefix_data16" "1")
8010 (set_attr "mode" "TI")])
8012 (define_insn "avx_movmsk<ssemodesuffix>256"
8013 [(set (match_operand:SI 0 "register_operand" "=r")
8015 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8017 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8018 "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8019 [(set_attr "type" "ssecvt")
8020 (set_attr "prefix" "vex")
8021 (set_attr "mode" "<MODE>")])
8023 (define_insn "<sse>_movmsk<ssemodesuffix>"
8024 [(set (match_operand:SI 0 "register_operand" "=r")
8026 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8028 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8029 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8030 [(set_attr "type" "ssemov")
8031 (set_attr "prefix" "maybe_vex")
8032 (set_attr "mode" "<MODE>")])
8034 (define_insn "sse2_pmovmskb"
8035 [(set (match_operand:SI 0 "register_operand" "=r")
8036 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8039 "%vpmovmskb\t{%1, %0|%0, %1}"
8040 [(set_attr "type" "ssemov")
8041 (set_attr "prefix_data16" "1")
8042 (set_attr "prefix" "maybe_vex")
8043 (set_attr "mode" "SI")])
8045 (define_expand "sse2_maskmovdqu"
8046 [(set (match_operand:V16QI 0 "memory_operand" "")
8047 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8048 (match_operand:V16QI 2 "register_operand" "")
8053 (define_insn "*sse2_maskmovdqu"
8054 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8055 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8056 (match_operand:V16QI 2 "register_operand" "x")
8057 (mem:V16QI (match_dup 0))]
8059 "TARGET_SSE2 && !TARGET_64BIT"
8060 ;; @@@ check ordering of operands in intel/nonintel syntax
8061 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8062 [(set_attr "type" "ssemov")
8063 (set_attr "prefix_data16" "1")
8064 ;; The implicit %rdi operand confuses default length_vex computation.
8065 (set_attr "length_vex" "3")
8066 (set_attr "prefix" "maybe_vex")
8067 (set_attr "mode" "TI")])
8069 (define_insn "*sse2_maskmovdqu_rex64"
8070 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8071 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8072 (match_operand:V16QI 2 "register_operand" "x")
8073 (mem:V16QI (match_dup 0))]
8075 "TARGET_SSE2 && TARGET_64BIT"
8076 ;; @@@ check ordering of operands in intel/nonintel syntax
8077 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8078 [(set_attr "type" "ssemov")
8079 (set_attr "prefix_data16" "1")
8080 ;; The implicit %rdi operand confuses default length_vex computation.
8081 (set (attr "length_vex")
8082 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8083 (set_attr "prefix" "maybe_vex")
8084 (set_attr "mode" "TI")])
8086 (define_insn "sse_ldmxcsr"
8087 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8091 [(set_attr "type" "sse")
8092 (set_attr "atom_sse_attr" "mxcsr")
8093 (set_attr "prefix" "maybe_vex")
8094 (set_attr "memory" "load")])
8096 (define_insn "sse_stmxcsr"
8097 [(set (match_operand:SI 0 "memory_operand" "=m")
8098 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8101 [(set_attr "type" "sse")
8102 (set_attr "atom_sse_attr" "mxcsr")
8103 (set_attr "prefix" "maybe_vex")
8104 (set_attr "memory" "store")])
8106 (define_expand "sse_sfence"
8108 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8109 "TARGET_SSE || TARGET_3DNOW_A"
8111 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8112 MEM_VOLATILE_P (operands[0]) = 1;
8115 (define_insn "*sse_sfence"
8116 [(set (match_operand:BLK 0 "" "")
8117 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8118 "TARGET_SSE || TARGET_3DNOW_A"
8120 [(set_attr "type" "sse")
8121 (set_attr "length_address" "0")
8122 (set_attr "atom_sse_attr" "fence")
8123 (set_attr "memory" "unknown")])
8125 (define_insn "sse2_clflush"
8126 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8130 [(set_attr "type" "sse")
8131 (set_attr "atom_sse_attr" "fence")
8132 (set_attr "memory" "unknown")])
8134 (define_expand "sse2_mfence"
8136 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8139 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8140 MEM_VOLATILE_P (operands[0]) = 1;
8143 (define_insn "*sse2_mfence"
8144 [(set (match_operand:BLK 0 "" "")
8145 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8146 "TARGET_64BIT || TARGET_SSE2"
8148 [(set_attr "type" "sse")
8149 (set_attr "length_address" "0")
8150 (set_attr "atom_sse_attr" "fence")
8151 (set_attr "memory" "unknown")])
8153 (define_expand "sse2_lfence"
8155 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8158 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8159 MEM_VOLATILE_P (operands[0]) = 1;
8162 (define_insn "*sse2_lfence"
8163 [(set (match_operand:BLK 0 "" "")
8164 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8167 [(set_attr "type" "sse")
8168 (set_attr "length_address" "0")
8169 (set_attr "atom_sse_attr" "lfence")
8170 (set_attr "memory" "unknown")])
8172 (define_insn "sse3_mwait"
8173 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8174 (match_operand:SI 1 "register_operand" "c")]
8177 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8178 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8179 ;; we only need to set up 32bit registers.
8181 [(set_attr "length" "3")])
8183 (define_insn "sse3_monitor"
8184 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8185 (match_operand:SI 1 "register_operand" "c")
8186 (match_operand:SI 2 "register_operand" "d")]
8188 "TARGET_SSE3 && !TARGET_64BIT"
8189 "monitor\t%0, %1, %2"
8190 [(set_attr "length" "3")])
8192 (define_insn "sse3_monitor64"
8193 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8194 (match_operand:SI 1 "register_operand" "c")
8195 (match_operand:SI 2 "register_operand" "d")]
8197 "TARGET_SSE3 && TARGET_64BIT"
8198 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8199 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8200 ;; zero extended to 64bit, we only need to set up 32bit registers.
8202 [(set_attr "length" "3")])
8204 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8206 ;; SSSE3 instructions
8208 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8210 (define_insn "*avx_phaddwv8hi3"
8211 [(set (match_operand:V8HI 0 "register_operand" "=x")
8217 (match_operand:V8HI 1 "register_operand" "x")
8218 (parallel [(const_int 0)]))
8219 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8221 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8222 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8225 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8226 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8228 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8229 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8234 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8235 (parallel [(const_int 0)]))
8236 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8238 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8239 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8242 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8243 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8245 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8246 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8248 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8249 [(set_attr "type" "sseiadd")
8250 (set_attr "prefix_extra" "1")
8251 (set_attr "prefix" "vex")
8252 (set_attr "mode" "TI")])
8254 (define_insn "ssse3_phaddwv8hi3"
8255 [(set (match_operand:V8HI 0 "register_operand" "=x")
8261 (match_operand:V8HI 1 "register_operand" "0")
8262 (parallel [(const_int 0)]))
8263 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8265 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8266 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8269 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8270 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8272 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8273 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8278 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8279 (parallel [(const_int 0)]))
8280 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8282 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8283 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8286 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8287 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8289 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8290 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8292 "phaddw\t{%2, %0|%0, %2}"
8293 [(set_attr "type" "sseiadd")
8294 (set_attr "atom_unit" "complex")
8295 (set_attr "prefix_data16" "1")
8296 (set_attr "prefix_extra" "1")
8297 (set_attr "mode" "TI")])
8299 (define_insn "ssse3_phaddwv4hi3"
8300 [(set (match_operand:V4HI 0 "register_operand" "=y")
8305 (match_operand:V4HI 1 "register_operand" "0")
8306 (parallel [(const_int 0)]))
8307 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8309 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8310 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8314 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8315 (parallel [(const_int 0)]))
8316 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8318 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8319 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8321 "phaddw\t{%2, %0|%0, %2}"
8322 [(set_attr "type" "sseiadd")
8323 (set_attr "atom_unit" "complex")
8324 (set_attr "prefix_extra" "1")
8325 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8326 (set_attr "mode" "DI")])
8328 (define_insn "*avx_phadddv4si3"
8329 [(set (match_operand:V4SI 0 "register_operand" "=x")
8334 (match_operand:V4SI 1 "register_operand" "x")
8335 (parallel [(const_int 0)]))
8336 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8338 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8339 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8343 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8344 (parallel [(const_int 0)]))
8345 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8347 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8348 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8350 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8351 [(set_attr "type" "sseiadd")
8352 (set_attr "prefix_extra" "1")
8353 (set_attr "prefix" "vex")
8354 (set_attr "mode" "TI")])
8356 (define_insn "ssse3_phadddv4si3"
8357 [(set (match_operand:V4SI 0 "register_operand" "=x")
8362 (match_operand:V4SI 1 "register_operand" "0")
8363 (parallel [(const_int 0)]))
8364 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8366 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8367 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8371 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8372 (parallel [(const_int 0)]))
8373 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8375 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8376 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8378 "phaddd\t{%2, %0|%0, %2}"
8379 [(set_attr "type" "sseiadd")
8380 (set_attr "atom_unit" "complex")
8381 (set_attr "prefix_data16" "1")
8382 (set_attr "prefix_extra" "1")
8383 (set_attr "mode" "TI")])
8385 (define_insn "ssse3_phadddv2si3"
8386 [(set (match_operand:V2SI 0 "register_operand" "=y")
8390 (match_operand:V2SI 1 "register_operand" "0")
8391 (parallel [(const_int 0)]))
8392 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8395 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8396 (parallel [(const_int 0)]))
8397 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8399 "phaddd\t{%2, %0|%0, %2}"
8400 [(set_attr "type" "sseiadd")
8401 (set_attr "atom_unit" "complex")
8402 (set_attr "prefix_extra" "1")
8403 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8404 (set_attr "mode" "DI")])
8406 (define_insn "*avx_phaddswv8hi3"
8407 [(set (match_operand:V8HI 0 "register_operand" "=x")
8413 (match_operand:V8HI 1 "register_operand" "x")
8414 (parallel [(const_int 0)]))
8415 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8417 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8418 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8421 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8422 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8424 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8425 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8430 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8431 (parallel [(const_int 0)]))
8432 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8434 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8435 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8438 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8439 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8441 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8442 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8444 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8445 [(set_attr "type" "sseiadd")
8446 (set_attr "prefix_extra" "1")
8447 (set_attr "prefix" "vex")
8448 (set_attr "mode" "TI")])
8450 (define_insn "ssse3_phaddswv8hi3"
8451 [(set (match_operand:V8HI 0 "register_operand" "=x")
8457 (match_operand:V8HI 1 "register_operand" "0")
8458 (parallel [(const_int 0)]))
8459 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8461 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8462 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8465 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8466 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8468 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8469 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8474 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8475 (parallel [(const_int 0)]))
8476 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8478 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8479 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8482 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8483 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8485 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8486 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8488 "phaddsw\t{%2, %0|%0, %2}"
8489 [(set_attr "type" "sseiadd")
8490 (set_attr "atom_unit" "complex")
8491 (set_attr "prefix_data16" "1")
8492 (set_attr "prefix_extra" "1")
8493 (set_attr "mode" "TI")])
8495 (define_insn "ssse3_phaddswv4hi3"
8496 [(set (match_operand:V4HI 0 "register_operand" "=y")
8501 (match_operand:V4HI 1 "register_operand" "0")
8502 (parallel [(const_int 0)]))
8503 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8505 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8506 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8510 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8511 (parallel [(const_int 0)]))
8512 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8514 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8515 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8517 "phaddsw\t{%2, %0|%0, %2}"
8518 [(set_attr "type" "sseiadd")
8519 (set_attr "atom_unit" "complex")
8520 (set_attr "prefix_extra" "1")
8521 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8522 (set_attr "mode" "DI")])
8524 (define_insn "*avx_phsubwv8hi3"
8525 [(set (match_operand:V8HI 0 "register_operand" "=x")
8531 (match_operand:V8HI 1 "register_operand" "x")
8532 (parallel [(const_int 0)]))
8533 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8535 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8536 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8539 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8540 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8542 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8543 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8548 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8549 (parallel [(const_int 0)]))
8550 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8552 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8553 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8556 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8557 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8559 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8560 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8562 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8563 [(set_attr "type" "sseiadd")
8564 (set_attr "prefix_extra" "1")
8565 (set_attr "prefix" "vex")
8566 (set_attr "mode" "TI")])
8568 (define_insn "ssse3_phsubwv8hi3"
8569 [(set (match_operand:V8HI 0 "register_operand" "=x")
8575 (match_operand:V8HI 1 "register_operand" "0")
8576 (parallel [(const_int 0)]))
8577 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8579 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8580 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8583 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8584 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8586 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8587 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8592 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8593 (parallel [(const_int 0)]))
8594 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8596 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8597 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8600 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8601 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8603 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8604 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8606 "phsubw\t{%2, %0|%0, %2}"
8607 [(set_attr "type" "sseiadd")
8608 (set_attr "atom_unit" "complex")
8609 (set_attr "prefix_data16" "1")
8610 (set_attr "prefix_extra" "1")
8611 (set_attr "mode" "TI")])
8613 (define_insn "ssse3_phsubwv4hi3"
8614 [(set (match_operand:V4HI 0 "register_operand" "=y")
8619 (match_operand:V4HI 1 "register_operand" "0")
8620 (parallel [(const_int 0)]))
8621 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8623 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8624 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8628 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8629 (parallel [(const_int 0)]))
8630 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8632 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8633 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8635 "phsubw\t{%2, %0|%0, %2}"
8636 [(set_attr "type" "sseiadd")
8637 (set_attr "atom_unit" "complex")
8638 (set_attr "prefix_extra" "1")
8639 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8640 (set_attr "mode" "DI")])
8642 (define_insn "*avx_phsubdv4si3"
8643 [(set (match_operand:V4SI 0 "register_operand" "=x")
8648 (match_operand:V4SI 1 "register_operand" "x")
8649 (parallel [(const_int 0)]))
8650 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8652 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8653 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8657 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8658 (parallel [(const_int 0)]))
8659 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8661 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8662 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8664 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8665 [(set_attr "type" "sseiadd")
8666 (set_attr "prefix_extra" "1")
8667 (set_attr "prefix" "vex")
8668 (set_attr "mode" "TI")])
8670 (define_insn "ssse3_phsubdv4si3"
8671 [(set (match_operand:V4SI 0 "register_operand" "=x")
8676 (match_operand:V4SI 1 "register_operand" "0")
8677 (parallel [(const_int 0)]))
8678 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8680 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8681 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8685 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8686 (parallel [(const_int 0)]))
8687 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8689 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8690 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8692 "phsubd\t{%2, %0|%0, %2}"
8693 [(set_attr "type" "sseiadd")
8694 (set_attr "atom_unit" "complex")
8695 (set_attr "prefix_data16" "1")
8696 (set_attr "prefix_extra" "1")
8697 (set_attr "mode" "TI")])
8699 (define_insn "ssse3_phsubdv2si3"
8700 [(set (match_operand:V2SI 0 "register_operand" "=y")
8704 (match_operand:V2SI 1 "register_operand" "0")
8705 (parallel [(const_int 0)]))
8706 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8709 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8710 (parallel [(const_int 0)]))
8711 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8713 "phsubd\t{%2, %0|%0, %2}"
8714 [(set_attr "type" "sseiadd")
8715 (set_attr "atom_unit" "complex")
8716 (set_attr "prefix_extra" "1")
8717 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8718 (set_attr "mode" "DI")])
8720 (define_insn "*avx_phsubswv8hi3"
8721 [(set (match_operand:V8HI 0 "register_operand" "=x")
8727 (match_operand:V8HI 1 "register_operand" "x")
8728 (parallel [(const_int 0)]))
8729 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8731 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8732 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8735 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8736 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8738 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8739 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8744 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8745 (parallel [(const_int 0)]))
8746 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8748 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8749 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8752 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8753 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8755 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8756 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8758 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8759 [(set_attr "type" "sseiadd")
8760 (set_attr "prefix_extra" "1")
8761 (set_attr "prefix" "vex")
8762 (set_attr "mode" "TI")])
8764 (define_insn "ssse3_phsubswv8hi3"
8765 [(set (match_operand:V8HI 0 "register_operand" "=x")
8771 (match_operand:V8HI 1 "register_operand" "0")
8772 (parallel [(const_int 0)]))
8773 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8775 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8776 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8779 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8780 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8782 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8783 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8788 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8789 (parallel [(const_int 0)]))
8790 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8792 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8793 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8796 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8797 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8799 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8800 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8802 "phsubsw\t{%2, %0|%0, %2}"
8803 [(set_attr "type" "sseiadd")
8804 (set_attr "atom_unit" "complex")
8805 (set_attr "prefix_data16" "1")
8806 (set_attr "prefix_extra" "1")
8807 (set_attr "mode" "TI")])
8809 (define_insn "ssse3_phsubswv4hi3"
8810 [(set (match_operand:V4HI 0 "register_operand" "=y")
8815 (match_operand:V4HI 1 "register_operand" "0")
8816 (parallel [(const_int 0)]))
8817 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8819 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8820 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8824 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8825 (parallel [(const_int 0)]))
8826 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8828 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8829 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8831 "phsubsw\t{%2, %0|%0, %2}"
8832 [(set_attr "type" "sseiadd")
8833 (set_attr "atom_unit" "complex")
8834 (set_attr "prefix_extra" "1")
8835 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8836 (set_attr "mode" "DI")])
8838 (define_insn "*avx_pmaddubsw128"
8839 [(set (match_operand:V8HI 0 "register_operand" "=x")
8844 (match_operand:V16QI 1 "register_operand" "x")
8845 (parallel [(const_int 0)
8855 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8856 (parallel [(const_int 0)
8866 (vec_select:V16QI (match_dup 1)
8867 (parallel [(const_int 1)
8876 (vec_select:V16QI (match_dup 2)
8877 (parallel [(const_int 1)
8884 (const_int 15)]))))))]
8886 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8887 [(set_attr "type" "sseiadd")
8888 (set_attr "prefix_extra" "1")
8889 (set_attr "prefix" "vex")
8890 (set_attr "mode" "TI")])
8892 (define_insn "ssse3_pmaddubsw128"
8893 [(set (match_operand:V8HI 0 "register_operand" "=x")
8898 (match_operand:V16QI 1 "register_operand" "0")
8899 (parallel [(const_int 0)
8909 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8910 (parallel [(const_int 0)
8920 (vec_select:V16QI (match_dup 1)
8921 (parallel [(const_int 1)
8930 (vec_select:V16QI (match_dup 2)
8931 (parallel [(const_int 1)
8938 (const_int 15)]))))))]
8940 "pmaddubsw\t{%2, %0|%0, %2}"
8941 [(set_attr "type" "sseiadd")
8942 (set_attr "atom_unit" "simul")
8943 (set_attr "prefix_data16" "1")
8944 (set_attr "prefix_extra" "1")
8945 (set_attr "mode" "TI")])
8947 (define_insn "ssse3_pmaddubsw"
8948 [(set (match_operand:V4HI 0 "register_operand" "=y")
8953 (match_operand:V8QI 1 "register_operand" "0")
8954 (parallel [(const_int 0)
8960 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8961 (parallel [(const_int 0)
8967 (vec_select:V8QI (match_dup 1)
8968 (parallel [(const_int 1)
8973 (vec_select:V8QI (match_dup 2)
8974 (parallel [(const_int 1)
8977 (const_int 7)]))))))]
8979 "pmaddubsw\t{%2, %0|%0, %2}"
8980 [(set_attr "type" "sseiadd")
8981 (set_attr "atom_unit" "simul")
8982 (set_attr "prefix_extra" "1")
8983 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8984 (set_attr "mode" "DI")])
8986 (define_expand "ssse3_pmulhrswv8hi3"
8987 [(set (match_operand:V8HI 0 "register_operand" "")
8994 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8996 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8998 (const_vector:V8HI [(const_int 1) (const_int 1)
8999 (const_int 1) (const_int 1)
9000 (const_int 1) (const_int 1)
9001 (const_int 1) (const_int 1)]))
9004 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9006 (define_insn "*avx_pmulhrswv8hi3"
9007 [(set (match_operand:V8HI 0 "register_operand" "=x")
9014 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9016 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9018 (const_vector:V8HI [(const_int 1) (const_int 1)
9019 (const_int 1) (const_int 1)
9020 (const_int 1) (const_int 1)
9021 (const_int 1) (const_int 1)]))
9023 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9024 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9025 [(set_attr "type" "sseimul")
9026 (set_attr "prefix_extra" "1")
9027 (set_attr "prefix" "vex")
9028 (set_attr "mode" "TI")])
9030 (define_insn "*ssse3_pmulhrswv8hi3"
9031 [(set (match_operand:V8HI 0 "register_operand" "=x")
9038 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9040 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9042 (const_vector:V8HI [(const_int 1) (const_int 1)
9043 (const_int 1) (const_int 1)
9044 (const_int 1) (const_int 1)
9045 (const_int 1) (const_int 1)]))
9047 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9048 "pmulhrsw\t{%2, %0|%0, %2}"
9049 [(set_attr "type" "sseimul")
9050 (set_attr "prefix_data16" "1")
9051 (set_attr "prefix_extra" "1")
9052 (set_attr "mode" "TI")])
9054 (define_expand "ssse3_pmulhrswv4hi3"
9055 [(set (match_operand:V4HI 0 "register_operand" "")
9062 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9064 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9066 (const_vector:V4HI [(const_int 1) (const_int 1)
9067 (const_int 1) (const_int 1)]))
9070 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9072 (define_insn "*ssse3_pmulhrswv4hi3"
9073 [(set (match_operand:V4HI 0 "register_operand" "=y")
9080 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9082 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9084 (const_vector:V4HI [(const_int 1) (const_int 1)
9085 (const_int 1) (const_int 1)]))
9087 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9088 "pmulhrsw\t{%2, %0|%0, %2}"
9089 [(set_attr "type" "sseimul")
9090 (set_attr "prefix_extra" "1")
9091 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9092 (set_attr "mode" "DI")])
9094 (define_insn "*avx_pshufbv16qi3"
9095 [(set (match_operand:V16QI 0 "register_operand" "=x")
9096 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9097 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9100 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9101 [(set_attr "type" "sselog1")
9102 (set_attr "prefix_extra" "1")
9103 (set_attr "prefix" "vex")
9104 (set_attr "mode" "TI")])
9106 (define_insn "ssse3_pshufbv16qi3"
9107 [(set (match_operand:V16QI 0 "register_operand" "=x")
9108 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9109 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9112 "pshufb\t{%2, %0|%0, %2}";
9113 [(set_attr "type" "sselog1")
9114 (set_attr "prefix_data16" "1")
9115 (set_attr "prefix_extra" "1")
9116 (set_attr "mode" "TI")])
9118 (define_insn "ssse3_pshufbv8qi3"
9119 [(set (match_operand:V8QI 0 "register_operand" "=y")
9120 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9121 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9124 "pshufb\t{%2, %0|%0, %2}";
9125 [(set_attr "type" "sselog1")
9126 (set_attr "prefix_extra" "1")
9127 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9128 (set_attr "mode" "DI")])
9130 (define_insn "*avx_psign<mode>3"
9131 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9133 [(match_operand:SSEMODE124 1 "register_operand" "x")
9134 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9137 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9138 [(set_attr "type" "sselog1")
9139 (set_attr "prefix_extra" "1")
9140 (set_attr "prefix" "vex")
9141 (set_attr "mode" "TI")])
9143 (define_insn "ssse3_psign<mode>3"
9144 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9146 [(match_operand:SSEMODE124 1 "register_operand" "0")
9147 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9150 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9151 [(set_attr "type" "sselog1")
9152 (set_attr "prefix_data16" "1")
9153 (set_attr "prefix_extra" "1")
9154 (set_attr "mode" "TI")])
9156 (define_insn "ssse3_psign<mode>3"
9157 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9159 [(match_operand:MMXMODEI 1 "register_operand" "0")
9160 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9163 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9164 [(set_attr "type" "sselog1")
9165 (set_attr "prefix_extra" "1")
9166 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9167 (set_attr "mode" "DI")])
9169 (define_insn "*avx_palignrti"
9170 [(set (match_operand:TI 0 "register_operand" "=x")
9171 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9172 (match_operand:TI 2 "nonimmediate_operand" "xm")
9173 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9177 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9178 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9180 [(set_attr "type" "sseishft")
9181 (set_attr "prefix_extra" "1")
9182 (set_attr "length_immediate" "1")
9183 (set_attr "prefix" "vex")
9184 (set_attr "mode" "TI")])
9186 (define_insn "ssse3_palignrti"
9187 [(set (match_operand:TI 0 "register_operand" "=x")
9188 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9189 (match_operand:TI 2 "nonimmediate_operand" "xm")
9190 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9194 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9195 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9197 [(set_attr "type" "sseishft")
9198 (set_attr "atom_unit" "sishuf")
9199 (set_attr "prefix_data16" "1")
9200 (set_attr "prefix_extra" "1")
9201 (set_attr "length_immediate" "1")
9202 (set_attr "mode" "TI")])
9204 (define_insn "ssse3_palignrdi"
9205 [(set (match_operand:DI 0 "register_operand" "=y")
9206 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9207 (match_operand:DI 2 "nonimmediate_operand" "ym")
9208 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9212 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9213 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9215 [(set_attr "type" "sseishft")
9216 (set_attr "atom_unit" "sishuf")
9217 (set_attr "prefix_extra" "1")
9218 (set_attr "length_immediate" "1")
9219 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9220 (set_attr "mode" "DI")])
9222 (define_insn "abs<mode>2"
9223 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9224 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9226 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9227 [(set_attr "type" "sselog1")
9228 (set_attr "prefix_data16" "1")
9229 (set_attr "prefix_extra" "1")
9230 (set_attr "prefix" "maybe_vex")
9231 (set_attr "mode" "TI")])
9233 (define_insn "abs<mode>2"
9234 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9235 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9237 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9238 [(set_attr "type" "sselog1")
9239 (set_attr "prefix_rep" "0")
9240 (set_attr "prefix_extra" "1")
9241 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9242 (set_attr "mode" "DI")])
9244 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9246 ;; AMD SSE4A instructions
9248 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9250 (define_insn "sse4a_movnt<mode>"
9251 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9253 [(match_operand:MODEF 1 "register_operand" "x")]
9256 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9257 [(set_attr "type" "ssemov")
9258 (set_attr "mode" "<MODE>")])
9260 (define_insn "sse4a_vmmovnt<mode>"
9261 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9262 (unspec:<ssescalarmode>
9263 [(vec_select:<ssescalarmode>
9264 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9265 (parallel [(const_int 0)]))]
9268 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9269 [(set_attr "type" "ssemov")
9270 (set_attr "mode" "<ssescalarmode>")])
9272 (define_insn "sse4a_extrqi"
9273 [(set (match_operand:V2DI 0 "register_operand" "=x")
9274 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9275 (match_operand 2 "const_int_operand" "")
9276 (match_operand 3 "const_int_operand" "")]
9279 "extrq\t{%3, %2, %0|%0, %2, %3}"
9280 [(set_attr "type" "sse")
9281 (set_attr "prefix_data16" "1")
9282 (set_attr "length_immediate" "2")
9283 (set_attr "mode" "TI")])
9285 (define_insn "sse4a_extrq"
9286 [(set (match_operand:V2DI 0 "register_operand" "=x")
9287 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9288 (match_operand:V16QI 2 "register_operand" "x")]
9291 "extrq\t{%2, %0|%0, %2}"
9292 [(set_attr "type" "sse")
9293 (set_attr "prefix_data16" "1")
9294 (set_attr "mode" "TI")])
9296 (define_insn "sse4a_insertqi"
9297 [(set (match_operand:V2DI 0 "register_operand" "=x")
9298 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9299 (match_operand:V2DI 2 "register_operand" "x")
9300 (match_operand 3 "const_int_operand" "")
9301 (match_operand 4 "const_int_operand" "")]
9304 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9305 [(set_attr "type" "sseins")
9306 (set_attr "prefix_data16" "0")
9307 (set_attr "prefix_rep" "1")
9308 (set_attr "length_immediate" "2")
9309 (set_attr "mode" "TI")])
9311 (define_insn "sse4a_insertq"
9312 [(set (match_operand:V2DI 0 "register_operand" "=x")
9313 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9314 (match_operand:V2DI 2 "register_operand" "x")]
9317 "insertq\t{%2, %0|%0, %2}"
9318 [(set_attr "type" "sseins")
9319 (set_attr "prefix_data16" "0")
9320 (set_attr "prefix_rep" "1")
9321 (set_attr "mode" "TI")])
9323 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9325 ;; Intel SSE4.1 instructions
9327 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9329 (define_insn "avx_blend<ssemodesuffix><avxmodesuffix>"
9330 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9331 (vec_merge:AVXMODEF2P
9332 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9333 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9334 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9336 "vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9337 [(set_attr "type" "ssemov")
9338 (set_attr "prefix_extra" "1")
9339 (set_attr "length_immediate" "1")
9340 (set_attr "prefix" "vex")
9341 (set_attr "mode" "<avxvecmode>")])
9343 (define_insn "avx_blendv<ssemodesuffix><avxmodesuffix>"
9344 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9346 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9347 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9348 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9351 "vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9352 [(set_attr "type" "ssemov")
9353 (set_attr "prefix_extra" "1")
9354 (set_attr "length_immediate" "1")
9355 (set_attr "prefix" "vex")
9356 (set_attr "mode" "<avxvecmode>")])
9358 (define_insn "sse4_1_blend<ssemodesuffix>"
9359 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9360 (vec_merge:SSEMODEF2P
9361 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9362 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9363 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9365 "blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9366 [(set_attr "type" "ssemov")
9367 (set_attr "prefix_data16" "1")
9368 (set_attr "prefix_extra" "1")
9369 (set_attr "length_immediate" "1")
9370 (set_attr "mode" "<MODE>")])
9372 (define_insn "sse4_1_blendv<ssemodesuffix>"
9373 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9375 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9376 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9377 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9380 "blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9381 [(set_attr "type" "ssemov")
9382 (set_attr "prefix_data16" "1")
9383 (set_attr "prefix_extra" "1")
9384 (set_attr "mode" "<MODE>")])
9386 (define_insn "avx_dp<ssemodesuffix><avxmodesuffix>"
9387 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9389 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9390 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9391 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9394 "vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9395 [(set_attr "type" "ssemul")
9396 (set_attr "prefix" "vex")
9397 (set_attr "prefix_extra" "1")
9398 (set_attr "length_immediate" "1")
9399 (set_attr "mode" "<avxvecmode>")])
9401 (define_insn "sse4_1_dp<ssemodesuffix>"
9402 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9404 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9405 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9406 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9409 "dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9410 [(set_attr "type" "ssemul")
9411 (set_attr "prefix_data16" "1")
9412 (set_attr "prefix_extra" "1")
9413 (set_attr "length_immediate" "1")
9414 (set_attr "mode" "<MODE>")])
9416 (define_insn "sse4_1_movntdqa"
9417 [(set (match_operand:V2DI 0 "register_operand" "=x")
9418 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9421 "%vmovntdqa\t{%1, %0|%0, %1}"
9422 [(set_attr "type" "ssemov")
9423 (set_attr "prefix_extra" "1")
9424 (set_attr "prefix" "maybe_vex")
9425 (set_attr "mode" "TI")])
9427 (define_insn "*avx_mpsadbw"
9428 [(set (match_operand:V16QI 0 "register_operand" "=x")
9429 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9430 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9431 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9434 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9435 [(set_attr "type" "sselog1")
9436 (set_attr "prefix" "vex")
9437 (set_attr "prefix_extra" "1")
9438 (set_attr "length_immediate" "1")
9439 (set_attr "mode" "TI")])
9441 (define_insn "sse4_1_mpsadbw"
9442 [(set (match_operand:V16QI 0 "register_operand" "=x")
9443 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9444 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9445 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9448 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9449 [(set_attr "type" "sselog1")
9450 (set_attr "prefix_extra" "1")
9451 (set_attr "length_immediate" "1")
9452 (set_attr "mode" "TI")])
9454 (define_insn "*avx_packusdw"
9455 [(set (match_operand:V8HI 0 "register_operand" "=x")
9458 (match_operand:V4SI 1 "register_operand" "x"))
9460 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9462 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9463 [(set_attr "type" "sselog")
9464 (set_attr "prefix_extra" "1")
9465 (set_attr "prefix" "vex")
9466 (set_attr "mode" "TI")])
9468 (define_insn "sse4_1_packusdw"
9469 [(set (match_operand:V8HI 0 "register_operand" "=x")
9472 (match_operand:V4SI 1 "register_operand" "0"))
9474 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9476 "packusdw\t{%2, %0|%0, %2}"
9477 [(set_attr "type" "sselog")
9478 (set_attr "prefix_extra" "1")
9479 (set_attr "mode" "TI")])
9481 (define_insn "*avx_pblendvb"
9482 [(set (match_operand:V16QI 0 "register_operand" "=x")
9483 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9484 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9485 (match_operand:V16QI 3 "register_operand" "x")]
9488 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9489 [(set_attr "type" "ssemov")
9490 (set_attr "prefix_extra" "1")
9491 (set_attr "length_immediate" "1")
9492 (set_attr "prefix" "vex")
9493 (set_attr "mode" "TI")])
9495 (define_insn "sse4_1_pblendvb"
9496 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9497 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9498 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9499 (match_operand:V16QI 3 "register_operand" "Yz")]
9502 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9503 [(set_attr "type" "ssemov")
9504 (set_attr "prefix_extra" "1")
9505 (set_attr "mode" "TI")])
9507 (define_insn "*avx_pblendw"
9508 [(set (match_operand:V8HI 0 "register_operand" "=x")
9510 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9511 (match_operand:V8HI 1 "register_operand" "x")
9512 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9514 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9515 [(set_attr "type" "ssemov")
9516 (set_attr "prefix" "vex")
9517 (set_attr "prefix_extra" "1")
9518 (set_attr "length_immediate" "1")
9519 (set_attr "mode" "TI")])
9521 (define_insn "sse4_1_pblendw"
9522 [(set (match_operand:V8HI 0 "register_operand" "=x")
9524 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9525 (match_operand:V8HI 1 "register_operand" "0")
9526 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9528 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9529 [(set_attr "type" "ssemov")
9530 (set_attr "prefix_extra" "1")
9531 (set_attr "length_immediate" "1")
9532 (set_attr "mode" "TI")])
9534 (define_insn "sse4_1_phminposuw"
9535 [(set (match_operand:V8HI 0 "register_operand" "=x")
9536 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9537 UNSPEC_PHMINPOSUW))]
9539 "%vphminposuw\t{%1, %0|%0, %1}"
9540 [(set_attr "type" "sselog1")
9541 (set_attr "prefix_extra" "1")
9542 (set_attr "prefix" "maybe_vex")
9543 (set_attr "mode" "TI")])
9545 (define_insn "sse4_1_<code>v8qiv8hi2"
9546 [(set (match_operand:V8HI 0 "register_operand" "=x")
9549 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9550 (parallel [(const_int 0)
9559 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9560 [(set_attr "type" "ssemov")
9561 (set_attr "prefix_extra" "1")
9562 (set_attr "prefix" "maybe_vex")
9563 (set_attr "mode" "TI")])
9565 (define_insn "sse4_1_<code>v4qiv4si2"
9566 [(set (match_operand:V4SI 0 "register_operand" "=x")
9569 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9570 (parallel [(const_int 0)
9575 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9576 [(set_attr "type" "ssemov")
9577 (set_attr "prefix_extra" "1")
9578 (set_attr "prefix" "maybe_vex")
9579 (set_attr "mode" "TI")])
9581 (define_insn "sse4_1_<code>v4hiv4si2"
9582 [(set (match_operand:V4SI 0 "register_operand" "=x")
9585 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9586 (parallel [(const_int 0)
9591 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9592 [(set_attr "type" "ssemov")
9593 (set_attr "prefix_extra" "1")
9594 (set_attr "prefix" "maybe_vex")
9595 (set_attr "mode" "TI")])
9597 (define_insn "sse4_1_<code>v2qiv2di2"
9598 [(set (match_operand:V2DI 0 "register_operand" "=x")
9601 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9602 (parallel [(const_int 0)
9605 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9606 [(set_attr "type" "ssemov")
9607 (set_attr "prefix_extra" "1")
9608 (set_attr "prefix" "maybe_vex")
9609 (set_attr "mode" "TI")])
9611 (define_insn "sse4_1_<code>v2hiv2di2"
9612 [(set (match_operand:V2DI 0 "register_operand" "=x")
9615 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9616 (parallel [(const_int 0)
9619 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9620 [(set_attr "type" "ssemov")
9621 (set_attr "prefix_extra" "1")
9622 (set_attr "prefix" "maybe_vex")
9623 (set_attr "mode" "TI")])
9625 (define_insn "sse4_1_<code>v2siv2di2"
9626 [(set (match_operand:V2DI 0 "register_operand" "=x")
9629 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9630 (parallel [(const_int 0)
9633 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9634 [(set_attr "type" "ssemov")
9635 (set_attr "prefix_extra" "1")
9636 (set_attr "prefix" "maybe_vex")
9637 (set_attr "mode" "TI")])
9639 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9640 ;; setting FLAGS_REG. But it is not a really compare instruction.
9641 (define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
9642 [(set (reg:CC FLAGS_REG)
9643 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9644 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9647 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9648 [(set_attr "type" "ssecomi")
9649 (set_attr "prefix_extra" "1")
9650 (set_attr "prefix" "vex")
9651 (set_attr "mode" "<MODE>")])
9653 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9654 ;; But it is not a really compare instruction.
9655 (define_insn "avx_ptest256"
9656 [(set (reg:CC FLAGS_REG)
9657 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9658 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9661 "vptest\t{%1, %0|%0, %1}"
9662 [(set_attr "type" "ssecomi")
9663 (set_attr "prefix_extra" "1")
9664 (set_attr "prefix" "vex")
9665 (set_attr "mode" "OI")])
9667 (define_insn "sse4_1_ptest"
9668 [(set (reg:CC FLAGS_REG)
9669 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9670 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9673 "%vptest\t{%1, %0|%0, %1}"
9674 [(set_attr "type" "ssecomi")
9675 (set_attr "prefix_extra" "1")
9676 (set_attr "prefix" "maybe_vex")
9677 (set_attr "mode" "TI")])
9679 (define_insn "avx_round<ssemodesuffix>256"
9680 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9681 (unspec:AVX256MODEF2P
9682 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9683 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9686 "vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9687 [(set_attr "type" "ssecvt")
9688 (set_attr "prefix_extra" "1")
9689 (set_attr "length_immediate" "1")
9690 (set_attr "prefix" "vex")
9691 (set_attr "mode" "<MODE>")])
9693 (define_insn "sse4_1_round<ssemodesuffix>"
9694 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9696 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9697 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9700 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9701 [(set_attr "type" "ssecvt")
9702 (set_attr "prefix_data16" "1")
9703 (set_attr "prefix_extra" "1")
9704 (set_attr "length_immediate" "1")
9705 (set_attr "prefix" "maybe_vex")
9706 (set_attr "mode" "<MODE>")])
9708 (define_insn "*avx_round<ssescalarmodesuffix>"
9709 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9710 (vec_merge:SSEMODEF2P
9712 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9713 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9715 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9718 "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9719 [(set_attr "type" "ssecvt")
9720 (set_attr "prefix_extra" "1")
9721 (set_attr "length_immediate" "1")
9722 (set_attr "prefix" "vex")
9723 (set_attr "mode" "<MODE>")])
9725 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9726 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9727 (vec_merge:SSEMODEF2P
9729 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9730 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9732 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9735 "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9736 [(set_attr "type" "ssecvt")
9737 (set_attr "prefix_data16" "1")
9738 (set_attr "prefix_extra" "1")
9739 (set_attr "length_immediate" "1")
9740 (set_attr "mode" "<MODE>")])
9742 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9744 ;; Intel SSE4.2 string/text processing instructions
9746 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9748 (define_insn_and_split "sse4_2_pcmpestr"
9749 [(set (match_operand:SI 0 "register_operand" "=c,c")
9751 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9752 (match_operand:SI 3 "register_operand" "a,a")
9753 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9754 (match_operand:SI 5 "register_operand" "d,d")
9755 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9757 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9765 (set (reg:CC FLAGS_REG)
9774 && can_create_pseudo_p ()"
9779 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9780 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9781 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9784 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9785 operands[3], operands[4],
9786 operands[5], operands[6]));
9788 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9789 operands[3], operands[4],
9790 operands[5], operands[6]));
9791 if (flags && !(ecx || xmm0))
9792 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9793 operands[2], operands[3],
9794 operands[4], operands[5],
9798 [(set_attr "type" "sselog")
9799 (set_attr "prefix_data16" "1")
9800 (set_attr "prefix_extra" "1")
9801 (set_attr "length_immediate" "1")
9802 (set_attr "memory" "none,load")
9803 (set_attr "mode" "TI")])
9805 (define_insn "sse4_2_pcmpestri"
9806 [(set (match_operand:SI 0 "register_operand" "=c,c")
9808 [(match_operand:V16QI 1 "register_operand" "x,x")
9809 (match_operand:SI 2 "register_operand" "a,a")
9810 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9811 (match_operand:SI 4 "register_operand" "d,d")
9812 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9814 (set (reg:CC FLAGS_REG)
9823 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9824 [(set_attr "type" "sselog")
9825 (set_attr "prefix_data16" "1")
9826 (set_attr "prefix_extra" "1")
9827 (set_attr "prefix" "maybe_vex")
9828 (set_attr "length_immediate" "1")
9829 (set_attr "memory" "none,load")
9830 (set_attr "mode" "TI")])
9832 (define_insn "sse4_2_pcmpestrm"
9833 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9835 [(match_operand:V16QI 1 "register_operand" "x,x")
9836 (match_operand:SI 2 "register_operand" "a,a")
9837 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9838 (match_operand:SI 4 "register_operand" "d,d")
9839 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9841 (set (reg:CC FLAGS_REG)
9850 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9851 [(set_attr "type" "sselog")
9852 (set_attr "prefix_data16" "1")
9853 (set_attr "prefix_extra" "1")
9854 (set_attr "length_immediate" "1")
9855 (set_attr "prefix" "maybe_vex")
9856 (set_attr "memory" "none,load")
9857 (set_attr "mode" "TI")])
9859 (define_insn "sse4_2_pcmpestr_cconly"
9860 [(set (reg:CC FLAGS_REG)
9862 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9863 (match_operand:SI 3 "register_operand" "a,a,a,a")
9864 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9865 (match_operand:SI 5 "register_operand" "d,d,d,d")
9866 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9868 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9869 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9872 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9873 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9874 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9875 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9876 [(set_attr "type" "sselog")
9877 (set_attr "prefix_data16" "1")
9878 (set_attr "prefix_extra" "1")
9879 (set_attr "length_immediate" "1")
9880 (set_attr "memory" "none,load,none,load")
9881 (set_attr "prefix" "maybe_vex")
9882 (set_attr "mode" "TI")])
9884 (define_insn_and_split "sse4_2_pcmpistr"
9885 [(set (match_operand:SI 0 "register_operand" "=c,c")
9887 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9888 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9889 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9891 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9897 (set (reg:CC FLAGS_REG)
9904 && can_create_pseudo_p ()"
9909 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9910 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9911 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9914 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9915 operands[3], operands[4]));
9917 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9918 operands[3], operands[4]));
9919 if (flags && !(ecx || xmm0))
9920 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9921 operands[2], operands[3],
9925 [(set_attr "type" "sselog")
9926 (set_attr "prefix_data16" "1")
9927 (set_attr "prefix_extra" "1")
9928 (set_attr "length_immediate" "1")
9929 (set_attr "memory" "none,load")
9930 (set_attr "mode" "TI")])
9932 (define_insn "sse4_2_pcmpistri"
9933 [(set (match_operand:SI 0 "register_operand" "=c,c")
9935 [(match_operand:V16QI 1 "register_operand" "x,x")
9936 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9937 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9939 (set (reg:CC FLAGS_REG)
9946 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9947 [(set_attr "type" "sselog")
9948 (set_attr "prefix_data16" "1")
9949 (set_attr "prefix_extra" "1")
9950 (set_attr "length_immediate" "1")
9951 (set_attr "prefix" "maybe_vex")
9952 (set_attr "memory" "none,load")
9953 (set_attr "mode" "TI")])
9955 (define_insn "sse4_2_pcmpistrm"
9956 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9958 [(match_operand:V16QI 1 "register_operand" "x,x")
9959 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9960 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9962 (set (reg:CC FLAGS_REG)
9969 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9970 [(set_attr "type" "sselog")
9971 (set_attr "prefix_data16" "1")
9972 (set_attr "prefix_extra" "1")
9973 (set_attr "length_immediate" "1")
9974 (set_attr "prefix" "maybe_vex")
9975 (set_attr "memory" "none,load")
9976 (set_attr "mode" "TI")])
9978 (define_insn "sse4_2_pcmpistr_cconly"
9979 [(set (reg:CC FLAGS_REG)
9981 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9982 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9983 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9985 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9986 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9989 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9990 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9991 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9992 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9993 [(set_attr "type" "sselog")
9994 (set_attr "prefix_data16" "1")
9995 (set_attr "prefix_extra" "1")
9996 (set_attr "length_immediate" "1")
9997 (set_attr "memory" "none,load,none,load")
9998 (set_attr "prefix" "maybe_vex")
9999 (set_attr "mode" "TI")])
10001 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10003 ;; XOP instructions
10005 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10007 ;; XOP parallel integer multiply/add instructions.
10008 ;; Note the XOP multiply/add instructions
10009 ;; a[i] = b[i] * c[i] + d[i];
10010 ;; do not allow the value being added to be a memory operation.
10011 (define_insn "xop_pmacsww"
10012 [(set (match_operand:V8HI 0 "register_operand" "=x")
10015 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10016 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10017 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10019 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10020 [(set_attr "type" "ssemuladd")
10021 (set_attr "mode" "TI")])
10023 (define_insn "xop_pmacssww"
10024 [(set (match_operand:V8HI 0 "register_operand" "=x")
10026 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10027 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10028 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10030 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10031 [(set_attr "type" "ssemuladd")
10032 (set_attr "mode" "TI")])
10034 (define_insn "xop_pmacsdd"
10035 [(set (match_operand:V4SI 0 "register_operand" "=x")
10038 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10039 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10040 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10042 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10043 [(set_attr "type" "ssemuladd")
10044 (set_attr "mode" "TI")])
10046 (define_insn "xop_pmacssdd"
10047 [(set (match_operand:V4SI 0 "register_operand" "=x")
10049 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10050 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10051 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10053 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10054 [(set_attr "type" "ssemuladd")
10055 (set_attr "mode" "TI")])
10057 (define_insn "xop_pmacssdql"
10058 [(set (match_operand:V2DI 0 "register_operand" "=x")
10063 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10064 (parallel [(const_int 1)
10067 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10068 (parallel [(const_int 1)
10070 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10072 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10073 [(set_attr "type" "ssemuladd")
10074 (set_attr "mode" "TI")])
10076 (define_insn "xop_pmacssdqh"
10077 [(set (match_operand:V2DI 0 "register_operand" "=x")
10082 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10083 (parallel [(const_int 0)
10087 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10088 (parallel [(const_int 0)
10090 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10092 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10093 [(set_attr "type" "ssemuladd")
10094 (set_attr "mode" "TI")])
10096 (define_insn "xop_pmacsdql"
10097 [(set (match_operand:V2DI 0 "register_operand" "=x")
10102 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10103 (parallel [(const_int 1)
10107 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10108 (parallel [(const_int 1)
10110 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10112 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10113 [(set_attr "type" "ssemuladd")
10114 (set_attr "mode" "TI")])
10116 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10117 ;; fake it with a multiply/add. In general, we expect the define_split to
10118 ;; occur before register allocation, so we have to handle the corner case where
10119 ;; the target is the same as operands 1/2
10120 (define_insn_and_split "xop_mulv2div2di3_low"
10121 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10125 (match_operand:V4SI 1 "register_operand" "%x")
10126 (parallel [(const_int 1)
10130 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10131 (parallel [(const_int 1)
10132 (const_int 3)])))))]
10135 "&& reload_completed"
10136 [(set (match_dup 0)
10144 (parallel [(const_int 1)
10149 (parallel [(const_int 1)
10153 operands[3] = CONST0_RTX (V2DImode);
10155 [(set_attr "type" "ssemul")
10156 (set_attr "mode" "TI")])
10158 (define_insn "xop_pmacsdqh"
10159 [(set (match_operand:V2DI 0 "register_operand" "=x")
10164 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10165 (parallel [(const_int 0)
10169 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10170 (parallel [(const_int 0)
10172 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10174 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10175 [(set_attr "type" "ssemuladd")
10176 (set_attr "mode" "TI")])
10178 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10179 ;; fake it with a multiply/add. In general, we expect the define_split to
10180 ;; occur before register allocation, so we have to handle the corner case where
10181 ;; the target is the same as either operands[1] or operands[2]
10182 (define_insn_and_split "xop_mulv2div2di3_high"
10183 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10187 (match_operand:V4SI 1 "register_operand" "%x")
10188 (parallel [(const_int 0)
10192 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10193 (parallel [(const_int 0)
10194 (const_int 2)])))))]
10197 "&& reload_completed"
10198 [(set (match_dup 0)
10206 (parallel [(const_int 0)
10211 (parallel [(const_int 0)
10215 operands[3] = CONST0_RTX (V2DImode);
10217 [(set_attr "type" "ssemul")
10218 (set_attr "mode" "TI")])
10220 ;; XOP parallel integer multiply/add instructions for the intrinisics
10221 (define_insn "xop_pmacsswd"
10222 [(set (match_operand:V4SI 0 "register_operand" "=x")
10227 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10228 (parallel [(const_int 1)
10234 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10235 (parallel [(const_int 1)
10239 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10241 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10242 [(set_attr "type" "ssemuladd")
10243 (set_attr "mode" "TI")])
10245 (define_insn "xop_pmacswd"
10246 [(set (match_operand:V4SI 0 "register_operand" "=x")
10251 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10252 (parallel [(const_int 1)
10258 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10259 (parallel [(const_int 1)
10263 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10265 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10266 [(set_attr "type" "ssemuladd")
10267 (set_attr "mode" "TI")])
10269 (define_insn "xop_pmadcsswd"
10270 [(set (match_operand:V4SI 0 "register_operand" "=x")
10276 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10277 (parallel [(const_int 0)
10283 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10284 (parallel [(const_int 0)
10292 (parallel [(const_int 1)
10299 (parallel [(const_int 1)
10302 (const_int 7)])))))
10303 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10305 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10306 [(set_attr "type" "ssemuladd")
10307 (set_attr "mode" "TI")])
10309 (define_insn "xop_pmadcswd"
10310 [(set (match_operand:V4SI 0 "register_operand" "=x")
10316 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10317 (parallel [(const_int 0)
10323 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10324 (parallel [(const_int 0)
10332 (parallel [(const_int 1)
10339 (parallel [(const_int 1)
10342 (const_int 7)])))))
10343 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10345 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10346 [(set_attr "type" "ssemuladd")
10347 (set_attr "mode" "TI")])
10349 ;; XOP parallel XMM conditional moves
10350 (define_insn "xop_pcmov_<mode>"
10351 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10352 (if_then_else:SSEMODE
10353 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10354 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10355 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10357 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10358 [(set_attr "type" "sse4arg")])
10360 (define_insn "xop_pcmov_<mode>256"
10361 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10362 (if_then_else:AVX256MODE
10363 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10364 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10365 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10367 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10368 [(set_attr "type" "sse4arg")])
10370 ;; XOP horizontal add/subtract instructions
10371 (define_insn "xop_phaddbw"
10372 [(set (match_operand:V8HI 0 "register_operand" "=x")
10376 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10377 (parallel [(const_int 0)
10388 (parallel [(const_int 1)
10395 (const_int 15)])))))]
10397 "vphaddbw\t{%1, %0|%0, %1}"
10398 [(set_attr "type" "sseiadd1")])
10400 (define_insn "xop_phaddbd"
10401 [(set (match_operand:V4SI 0 "register_operand" "=x")
10406 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10407 (parallel [(const_int 0)
10414 (parallel [(const_int 1)
10417 (const_int 13)]))))
10422 (parallel [(const_int 2)
10429 (parallel [(const_int 3)
10432 (const_int 15)]))))))]
10434 "vphaddbd\t{%1, %0|%0, %1}"
10435 [(set_attr "type" "sseiadd1")])
10437 (define_insn "xop_phaddbq"
10438 [(set (match_operand:V2DI 0 "register_operand" "=x")
10444 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10445 (parallel [(const_int 0)
10450 (parallel [(const_int 1)
10456 (parallel [(const_int 2)
10461 (parallel [(const_int 3)
10462 (const_int 7)])))))
10468 (parallel [(const_int 8)
10473 (parallel [(const_int 9)
10474 (const_int 13)]))))
10479 (parallel [(const_int 10)
10484 (parallel [(const_int 11)
10485 (const_int 15)])))))))]
10487 "vphaddbq\t{%1, %0|%0, %1}"
10488 [(set_attr "type" "sseiadd1")])
10490 (define_insn "xop_phaddwd"
10491 [(set (match_operand:V4SI 0 "register_operand" "=x")
10495 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10496 (parallel [(const_int 0)
10503 (parallel [(const_int 1)
10506 (const_int 7)])))))]
10508 "vphaddwd\t{%1, %0|%0, %1}"
10509 [(set_attr "type" "sseiadd1")])
10511 (define_insn "xop_phaddwq"
10512 [(set (match_operand:V2DI 0 "register_operand" "=x")
10517 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10518 (parallel [(const_int 0)
10523 (parallel [(const_int 1)
10529 (parallel [(const_int 2)
10534 (parallel [(const_int 3)
10535 (const_int 7)]))))))]
10537 "vphaddwq\t{%1, %0|%0, %1}"
10538 [(set_attr "type" "sseiadd1")])
10540 (define_insn "xop_phadddq"
10541 [(set (match_operand:V2DI 0 "register_operand" "=x")
10545 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10546 (parallel [(const_int 0)
10551 (parallel [(const_int 1)
10552 (const_int 3)])))))]
10554 "vphadddq\t{%1, %0|%0, %1}"
10555 [(set_attr "type" "sseiadd1")])
10557 (define_insn "xop_phaddubw"
10558 [(set (match_operand:V8HI 0 "register_operand" "=x")
10562 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10563 (parallel [(const_int 0)
10574 (parallel [(const_int 1)
10581 (const_int 15)])))))]
10583 "vphaddubw\t{%1, %0|%0, %1}"
10584 [(set_attr "type" "sseiadd1")])
10586 (define_insn "xop_phaddubd"
10587 [(set (match_operand:V4SI 0 "register_operand" "=x")
10592 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10593 (parallel [(const_int 0)
10600 (parallel [(const_int 1)
10603 (const_int 13)]))))
10608 (parallel [(const_int 2)
10615 (parallel [(const_int 3)
10618 (const_int 15)]))))))]
10620 "vphaddubd\t{%1, %0|%0, %1}"
10621 [(set_attr "type" "sseiadd1")])
10623 (define_insn "xop_phaddubq"
10624 [(set (match_operand:V2DI 0 "register_operand" "=x")
10630 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10631 (parallel [(const_int 0)
10636 (parallel [(const_int 1)
10642 (parallel [(const_int 2)
10647 (parallel [(const_int 3)
10648 (const_int 7)])))))
10654 (parallel [(const_int 8)
10659 (parallel [(const_int 9)
10660 (const_int 13)]))))
10665 (parallel [(const_int 10)
10670 (parallel [(const_int 11)
10671 (const_int 15)])))))))]
10673 "vphaddubq\t{%1, %0|%0, %1}"
10674 [(set_attr "type" "sseiadd1")])
10676 (define_insn "xop_phadduwd"
10677 [(set (match_operand:V4SI 0 "register_operand" "=x")
10681 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10682 (parallel [(const_int 0)
10689 (parallel [(const_int 1)
10692 (const_int 7)])))))]
10694 "vphadduwd\t{%1, %0|%0, %1}"
10695 [(set_attr "type" "sseiadd1")])
10697 (define_insn "xop_phadduwq"
10698 [(set (match_operand:V2DI 0 "register_operand" "=x")
10703 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10704 (parallel [(const_int 0)
10709 (parallel [(const_int 1)
10715 (parallel [(const_int 2)
10720 (parallel [(const_int 3)
10721 (const_int 7)]))))))]
10723 "vphadduwq\t{%1, %0|%0, %1}"
10724 [(set_attr "type" "sseiadd1")])
10726 (define_insn "xop_phaddudq"
10727 [(set (match_operand:V2DI 0 "register_operand" "=x")
10731 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10732 (parallel [(const_int 0)
10737 (parallel [(const_int 1)
10738 (const_int 3)])))))]
10740 "vphaddudq\t{%1, %0|%0, %1}"
10741 [(set_attr "type" "sseiadd1")])
10743 (define_insn "xop_phsubbw"
10744 [(set (match_operand:V8HI 0 "register_operand" "=x")
10748 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10749 (parallel [(const_int 0)
10760 (parallel [(const_int 1)
10767 (const_int 15)])))))]
10769 "vphsubbw\t{%1, %0|%0, %1}"
10770 [(set_attr "type" "sseiadd1")])
10772 (define_insn "xop_phsubwd"
10773 [(set (match_operand:V4SI 0 "register_operand" "=x")
10777 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10778 (parallel [(const_int 0)
10785 (parallel [(const_int 1)
10788 (const_int 7)])))))]
10790 "vphsubwd\t{%1, %0|%0, %1}"
10791 [(set_attr "type" "sseiadd1")])
10793 (define_insn "xop_phsubdq"
10794 [(set (match_operand:V2DI 0 "register_operand" "=x")
10798 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10799 (parallel [(const_int 0)
10804 (parallel [(const_int 1)
10805 (const_int 3)])))))]
10807 "vphsubdq\t{%1, %0|%0, %1}"
10808 [(set_attr "type" "sseiadd1")])
10810 ;; XOP permute instructions
10811 (define_insn "xop_pperm"
10812 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10814 [(match_operand:V16QI 1 "register_operand" "x,x")
10815 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10816 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10817 UNSPEC_XOP_PERMUTE))]
10818 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10819 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10820 [(set_attr "type" "sse4arg")
10821 (set_attr "mode" "TI")])
10823 ;; XOP pack instructions that combine two vectors into a smaller vector
10824 (define_insn "xop_pperm_pack_v2di_v4si"
10825 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10828 (match_operand:V2DI 1 "register_operand" "x,x"))
10830 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10831 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10832 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10833 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10834 [(set_attr "type" "sse4arg")
10835 (set_attr "mode" "TI")])
10837 (define_insn "xop_pperm_pack_v4si_v8hi"
10838 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10841 (match_operand:V4SI 1 "register_operand" "x,x"))
10843 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10844 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10845 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10846 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10847 [(set_attr "type" "sse4arg")
10848 (set_attr "mode" "TI")])
10850 (define_insn "xop_pperm_pack_v8hi_v16qi"
10851 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10854 (match_operand:V8HI 1 "register_operand" "x,x"))
10856 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10857 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10858 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10859 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10860 [(set_attr "type" "sse4arg")
10861 (set_attr "mode" "TI")])
10863 ;; XOP packed rotate instructions
10864 (define_expand "rotl<mode>3"
10865 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10866 (rotate:SSEMODE1248
10867 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10868 (match_operand:SI 2 "general_operand")))]
10871 /* If we were given a scalar, convert it to parallel */
10872 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10874 rtvec vs = rtvec_alloc (<ssescalarnum>);
10875 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10876 rtx reg = gen_reg_rtx (<MODE>mode);
10877 rtx op2 = operands[2];
10880 if (GET_MODE (op2) != <ssescalarmode>mode)
10882 op2 = gen_reg_rtx (<ssescalarmode>mode);
10883 convert_move (op2, operands[2], false);
10886 for (i = 0; i < <ssescalarnum>; i++)
10887 RTVEC_ELT (vs, i) = op2;
10889 emit_insn (gen_vec_init<mode> (reg, par));
10890 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10895 (define_expand "rotr<mode>3"
10896 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10897 (rotatert:SSEMODE1248
10898 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10899 (match_operand:SI 2 "general_operand")))]
10902 /* If we were given a scalar, convert it to parallel */
10903 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10905 rtvec vs = rtvec_alloc (<ssescalarnum>);
10906 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10907 rtx neg = gen_reg_rtx (<MODE>mode);
10908 rtx reg = gen_reg_rtx (<MODE>mode);
10909 rtx op2 = operands[2];
10912 if (GET_MODE (op2) != <ssescalarmode>mode)
10914 op2 = gen_reg_rtx (<ssescalarmode>mode);
10915 convert_move (op2, operands[2], false);
10918 for (i = 0; i < <ssescalarnum>; i++)
10919 RTVEC_ELT (vs, i) = op2;
10921 emit_insn (gen_vec_init<mode> (reg, par));
10922 emit_insn (gen_neg<mode>2 (neg, reg));
10923 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
10928 (define_insn "xop_rotl<mode>3"
10929 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10930 (rotate:SSEMODE1248
10931 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10932 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10934 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10935 [(set_attr "type" "sseishft")
10936 (set_attr "length_immediate" "1")
10937 (set_attr "mode" "TI")])
10939 (define_insn "xop_rotr<mode>3"
10940 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10941 (rotatert:SSEMODE1248
10942 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10943 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10946 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
10947 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
10949 [(set_attr "type" "sseishft")
10950 (set_attr "length_immediate" "1")
10951 (set_attr "mode" "TI")])
10953 (define_expand "vrotr<mode>3"
10954 [(match_operand:SSEMODE1248 0 "register_operand" "")
10955 (match_operand:SSEMODE1248 1 "register_operand" "")
10956 (match_operand:SSEMODE1248 2 "register_operand" "")]
10959 rtx reg = gen_reg_rtx (<MODE>mode);
10960 emit_insn (gen_neg<mode>2 (reg, operands[2]));
10961 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10965 (define_expand "vrotl<mode>3"
10966 [(match_operand:SSEMODE1248 0 "register_operand" "")
10967 (match_operand:SSEMODE1248 1 "register_operand" "")
10968 (match_operand:SSEMODE1248 2 "register_operand" "")]
10971 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
10975 (define_insn "xop_vrotl<mode>3"
10976 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
10977 (if_then_else:SSEMODE1248
10979 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
10981 (rotate:SSEMODE1248
10982 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
10984 (rotatert:SSEMODE1248
10986 (neg:SSEMODE1248 (match_dup 2)))))]
10987 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10988 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10989 [(set_attr "type" "sseishft")
10990 (set_attr "prefix_data16" "0")
10991 (set_attr "prefix_extra" "2")
10992 (set_attr "mode" "TI")])
10994 ;; XOP packed shift instructions.
10995 ;; FIXME: add V2DI back in
10996 (define_expand "vlshr<mode>3"
10997 [(match_operand:SSEMODE124 0 "register_operand" "")
10998 (match_operand:SSEMODE124 1 "register_operand" "")
10999 (match_operand:SSEMODE124 2 "register_operand" "")]
11002 rtx neg = gen_reg_rtx (<MODE>mode);
11003 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11004 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11008 (define_expand "vashr<mode>3"
11009 [(match_operand:SSEMODE124 0 "register_operand" "")
11010 (match_operand:SSEMODE124 1 "register_operand" "")
11011 (match_operand:SSEMODE124 2 "register_operand" "")]
11014 rtx neg = gen_reg_rtx (<MODE>mode);
11015 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11016 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11020 (define_expand "vashl<mode>3"
11021 [(match_operand:SSEMODE124 0 "register_operand" "")
11022 (match_operand:SSEMODE124 1 "register_operand" "")
11023 (match_operand:SSEMODE124 2 "register_operand" "")]
11026 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11030 (define_insn "xop_ashl<mode>3"
11031 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11032 (if_then_else:SSEMODE1248
11034 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11036 (ashift:SSEMODE1248
11037 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11039 (ashiftrt:SSEMODE1248
11041 (neg:SSEMODE1248 (match_dup 2)))))]
11042 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11043 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11044 [(set_attr "type" "sseishft")
11045 (set_attr "prefix_data16" "0")
11046 (set_attr "prefix_extra" "2")
11047 (set_attr "mode" "TI")])
11049 (define_insn "xop_lshl<mode>3"
11050 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11051 (if_then_else:SSEMODE1248
11053 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11055 (ashift:SSEMODE1248
11056 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11058 (lshiftrt:SSEMODE1248
11060 (neg:SSEMODE1248 (match_dup 2)))))]
11061 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11062 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11063 [(set_attr "type" "sseishft")
11064 (set_attr "prefix_data16" "0")
11065 (set_attr "prefix_extra" "2")
11066 (set_attr "mode" "TI")])
11068 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11069 (define_expand "ashlv16qi3"
11070 [(match_operand:V16QI 0 "register_operand" "")
11071 (match_operand:V16QI 1 "register_operand" "")
11072 (match_operand:SI 2 "nonmemory_operand" "")]
11075 rtvec vs = rtvec_alloc (16);
11076 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11077 rtx reg = gen_reg_rtx (V16QImode);
11079 for (i = 0; i < 16; i++)
11080 RTVEC_ELT (vs, i) = operands[2];
11082 emit_insn (gen_vec_initv16qi (reg, par));
11083 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11087 (define_expand "lshlv16qi3"
11088 [(match_operand:V16QI 0 "register_operand" "")
11089 (match_operand:V16QI 1 "register_operand" "")
11090 (match_operand:SI 2 "nonmemory_operand" "")]
11093 rtvec vs = rtvec_alloc (16);
11094 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11095 rtx reg = gen_reg_rtx (V16QImode);
11097 for (i = 0; i < 16; i++)
11098 RTVEC_ELT (vs, i) = operands[2];
11100 emit_insn (gen_vec_initv16qi (reg, par));
11101 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11105 (define_expand "ashrv16qi3"
11106 [(match_operand:V16QI 0 "register_operand" "")
11107 (match_operand:V16QI 1 "register_operand" "")
11108 (match_operand:SI 2 "nonmemory_operand" "")]
11111 rtvec vs = rtvec_alloc (16);
11112 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11113 rtx reg = gen_reg_rtx (V16QImode);
11115 rtx ele = ((CONST_INT_P (operands[2]))
11116 ? GEN_INT (- INTVAL (operands[2]))
11119 for (i = 0; i < 16; i++)
11120 RTVEC_ELT (vs, i) = ele;
11122 emit_insn (gen_vec_initv16qi (reg, par));
11124 if (!CONST_INT_P (operands[2]))
11126 rtx neg = gen_reg_rtx (V16QImode);
11127 emit_insn (gen_negv16qi2 (neg, reg));
11128 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11131 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11136 (define_expand "ashrv2di3"
11137 [(match_operand:V2DI 0 "register_operand" "")
11138 (match_operand:V2DI 1 "register_operand" "")
11139 (match_operand:DI 2 "nonmemory_operand" "")]
11142 rtvec vs = rtvec_alloc (2);
11143 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11144 rtx reg = gen_reg_rtx (V2DImode);
11147 if (CONST_INT_P (operands[2]))
11148 ele = GEN_INT (- INTVAL (operands[2]));
11149 else if (GET_MODE (operands[2]) != DImode)
11151 rtx move = gen_reg_rtx (DImode);
11152 ele = gen_reg_rtx (DImode);
11153 convert_move (move, operands[2], false);
11154 emit_insn (gen_negdi2 (ele, move));
11158 ele = gen_reg_rtx (DImode);
11159 emit_insn (gen_negdi2 (ele, operands[2]));
11162 RTVEC_ELT (vs, 0) = ele;
11163 RTVEC_ELT (vs, 1) = ele;
11164 emit_insn (gen_vec_initv2di (reg, par));
11165 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11169 ;; XOP FRCZ support
11171 (define_insn "xop_frcz<mode>2"
11172 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11174 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11177 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11178 [(set_attr "type" "ssecvt1")
11179 (set_attr "mode" "<MODE>")])
11182 (define_insn "xop_vmfrcz<mode>2"
11183 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11184 (vec_merge:SSEMODEF2P
11186 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11188 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11191 "vfrcz<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
11192 [(set_attr "type" "ssecvt1")
11193 (set_attr "mode" "<MODE>")])
11195 (define_insn "xop_frcz<mode>2256"
11196 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11198 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11201 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11202 [(set_attr "type" "ssecvt1")
11203 (set_attr "mode" "<MODE>")])
11205 (define_insn "xop_maskcmp<mode>3"
11206 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11207 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11208 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11209 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11211 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11212 [(set_attr "type" "sse4arg")
11213 (set_attr "prefix_data16" "0")
11214 (set_attr "prefix_rep" "0")
11215 (set_attr "prefix_extra" "2")
11216 (set_attr "length_immediate" "1")
11217 (set_attr "mode" "TI")])
11219 (define_insn "xop_maskcmp_uns<mode>3"
11220 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11221 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11222 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11223 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11225 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11226 [(set_attr "type" "ssecmp")
11227 (set_attr "prefix_data16" "0")
11228 (set_attr "prefix_rep" "0")
11229 (set_attr "prefix_extra" "2")
11230 (set_attr "length_immediate" "1")
11231 (set_attr "mode" "TI")])
11233 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11234 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11235 ;; the exact instruction generated for the intrinsic.
11236 (define_insn "xop_maskcmp_uns2<mode>3"
11237 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11238 (unspec:SSEMODE1248
11239 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11240 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11241 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11242 UNSPEC_XOP_UNSIGNED_CMP))]
11244 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11245 [(set_attr "type" "ssecmp")
11246 (set_attr "prefix_data16" "0")
11247 (set_attr "prefix_extra" "2")
11248 (set_attr "length_immediate" "1")
11249 (set_attr "mode" "TI")])
11251 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11252 ;; being added here to be complete.
11253 (define_insn "xop_pcom_tf<mode>3"
11254 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11255 (unspec:SSEMODE1248
11256 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11257 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11258 (match_operand:SI 3 "const_int_operand" "n")]
11259 UNSPEC_XOP_TRUEFALSE))]
11262 return ((INTVAL (operands[3]) != 0)
11263 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11264 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11266 [(set_attr "type" "ssecmp")
11267 (set_attr "prefix_data16" "0")
11268 (set_attr "prefix_extra" "2")
11269 (set_attr "length_immediate" "1")
11270 (set_attr "mode" "TI")])
11272 (define_insn "xop_vpermil2<mode>3"
11273 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11275 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11276 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11277 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
11278 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11281 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11282 [(set_attr "type" "sse4arg")
11283 (set_attr "length_immediate" "1")
11284 (set_attr "mode" "<MODE>")])
11286 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11287 (define_insn "*avx_aesenc"
11288 [(set (match_operand:V2DI 0 "register_operand" "=x")
11289 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11290 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11292 "TARGET_AES && TARGET_AVX"
11293 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11294 [(set_attr "type" "sselog1")
11295 (set_attr "prefix_extra" "1")
11296 (set_attr "prefix" "vex")
11297 (set_attr "mode" "TI")])
11299 (define_insn "aesenc"
11300 [(set (match_operand:V2DI 0 "register_operand" "=x")
11301 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11302 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11305 "aesenc\t{%2, %0|%0, %2}"
11306 [(set_attr "type" "sselog1")
11307 (set_attr "prefix_extra" "1")
11308 (set_attr "mode" "TI")])
11310 (define_insn "*avx_aesenclast"
11311 [(set (match_operand:V2DI 0 "register_operand" "=x")
11312 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11313 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11314 UNSPEC_AESENCLAST))]
11315 "TARGET_AES && TARGET_AVX"
11316 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11317 [(set_attr "type" "sselog1")
11318 (set_attr "prefix_extra" "1")
11319 (set_attr "prefix" "vex")
11320 (set_attr "mode" "TI")])
11322 (define_insn "aesenclast"
11323 [(set (match_operand:V2DI 0 "register_operand" "=x")
11324 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11325 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11326 UNSPEC_AESENCLAST))]
11328 "aesenclast\t{%2, %0|%0, %2}"
11329 [(set_attr "type" "sselog1")
11330 (set_attr "prefix_extra" "1")
11331 (set_attr "mode" "TI")])
11333 (define_insn "*avx_aesdec"
11334 [(set (match_operand:V2DI 0 "register_operand" "=x")
11335 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11336 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11338 "TARGET_AES && TARGET_AVX"
11339 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11340 [(set_attr "type" "sselog1")
11341 (set_attr "prefix_extra" "1")
11342 (set_attr "prefix" "vex")
11343 (set_attr "mode" "TI")])
11345 (define_insn "aesdec"
11346 [(set (match_operand:V2DI 0 "register_operand" "=x")
11347 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11348 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11351 "aesdec\t{%2, %0|%0, %2}"
11352 [(set_attr "type" "sselog1")
11353 (set_attr "prefix_extra" "1")
11354 (set_attr "mode" "TI")])
11356 (define_insn "*avx_aesdeclast"
11357 [(set (match_operand:V2DI 0 "register_operand" "=x")
11358 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11359 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11360 UNSPEC_AESDECLAST))]
11361 "TARGET_AES && TARGET_AVX"
11362 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11363 [(set_attr "type" "sselog1")
11364 (set_attr "prefix_extra" "1")
11365 (set_attr "prefix" "vex")
11366 (set_attr "mode" "TI")])
11368 (define_insn "aesdeclast"
11369 [(set (match_operand:V2DI 0 "register_operand" "=x")
11370 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11371 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11372 UNSPEC_AESDECLAST))]
11374 "aesdeclast\t{%2, %0|%0, %2}"
11375 [(set_attr "type" "sselog1")
11376 (set_attr "prefix_extra" "1")
11377 (set_attr "mode" "TI")])
11379 (define_insn "aesimc"
11380 [(set (match_operand:V2DI 0 "register_operand" "=x")
11381 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11384 "%vaesimc\t{%1, %0|%0, %1}"
11385 [(set_attr "type" "sselog1")
11386 (set_attr "prefix_extra" "1")
11387 (set_attr "prefix" "maybe_vex")
11388 (set_attr "mode" "TI")])
11390 (define_insn "aeskeygenassist"
11391 [(set (match_operand:V2DI 0 "register_operand" "=x")
11392 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11393 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11394 UNSPEC_AESKEYGENASSIST))]
11396 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11397 [(set_attr "type" "sselog1")
11398 (set_attr "prefix_extra" "1")
11399 (set_attr "length_immediate" "1")
11400 (set_attr "prefix" "maybe_vex")
11401 (set_attr "mode" "TI")])
11403 (define_insn "*vpclmulqdq"
11404 [(set (match_operand:V2DI 0 "register_operand" "=x")
11405 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11406 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11407 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11409 "TARGET_PCLMUL && TARGET_AVX"
11410 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11411 [(set_attr "type" "sselog1")
11412 (set_attr "prefix_extra" "1")
11413 (set_attr "length_immediate" "1")
11414 (set_attr "prefix" "vex")
11415 (set_attr "mode" "TI")])
11417 (define_insn "pclmulqdq"
11418 [(set (match_operand:V2DI 0 "register_operand" "=x")
11419 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11420 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11421 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11424 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11425 [(set_attr "type" "sselog1")
11426 (set_attr "prefix_extra" "1")
11427 (set_attr "length_immediate" "1")
11428 (set_attr "mode" "TI")])
11430 (define_expand "avx_vzeroall"
11431 [(match_par_dup 0 [(const_int 0)])]
11434 int nregs = TARGET_64BIT ? 16 : 8;
11437 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11439 XVECEXP (operands[0], 0, 0)
11440 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11443 for (regno = 0; regno < nregs; regno++)
11444 XVECEXP (operands[0], 0, regno + 1)
11445 = gen_rtx_SET (VOIDmode,
11446 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11447 CONST0_RTX (V8SImode));
11450 (define_insn "*avx_vzeroall"
11451 [(match_parallel 0 "vzeroall_operation"
11452 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11455 [(set_attr "type" "sse")
11456 (set_attr "modrm" "0")
11457 (set_attr "memory" "none")
11458 (set_attr "prefix" "vex")
11459 (set_attr "mode" "OI")])
11461 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11462 (define_expand "avx_vzeroupper"
11463 [(match_par_dup 0 [(const_int 0)])]
11466 int nregs = TARGET_64BIT ? 16 : 8;
11469 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11471 XVECEXP (operands[0], 0, 0)
11472 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11473 UNSPECV_VZEROUPPER);
11475 for (regno = 0; regno < nregs; regno++)
11476 XVECEXP (operands[0], 0, regno + 1)
11477 = gen_rtx_CLOBBER (VOIDmode,
11478 gen_rtx_REG (V8SImode, SSE_REGNO (regno)));
11481 (define_insn "*avx_vzeroupper"
11482 [(match_parallel 0 "vzeroupper_operation"
11483 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
11486 [(set_attr "type" "sse")
11487 (set_attr "modrm" "0")
11488 (set_attr "memory" "none")
11489 (set_attr "prefix" "vex")
11490 (set_attr "mode" "OI")])
11492 (define_insn_and_split "vec_dup<mode>"
11493 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11494 (vec_duplicate:AVX256MODE24P
11495 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11498 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11500 "&& reload_completed && REG_P (operands[1])"
11501 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11502 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11503 "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
11504 [(set_attr "type" "ssemov")
11505 (set_attr "prefix_extra" "1")
11506 (set_attr "prefix" "vex")
11507 (set_attr "mode" "V8SF")])
11509 (define_insn "avx_vbroadcastf128_<mode>"
11510 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11511 (vec_concat:AVX256MODE
11512 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11516 vbroadcastf128\t{%1, %0|%0, %1}
11517 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11518 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11519 [(set_attr "type" "ssemov,sselog1,sselog1")
11520 (set_attr "prefix_extra" "1")
11521 (set_attr "length_immediate" "0,1,1")
11522 (set_attr "prefix" "vex")
11523 (set_attr "mode" "V4SF,V8SF,V8SF")])
11525 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11526 ;; If it so happens that the input is in memory, use vbroadcast.
11527 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11528 (define_insn "*avx_vperm_broadcast_v4sf"
11529 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11531 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11532 (match_parallel 2 "avx_vbroadcast_operand"
11533 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11536 int elt = INTVAL (operands[3]);
11537 switch (which_alternative)
11541 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11542 return "vbroadcastss\t{%1, %0|%0, %1}";
11544 operands[2] = GEN_INT (elt * 0x55);
11545 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11547 gcc_unreachable ();
11550 [(set_attr "type" "ssemov,ssemov,sselog1")
11551 (set_attr "prefix_extra" "1")
11552 (set_attr "length_immediate" "0,0,1")
11553 (set_attr "prefix" "vex")
11554 (set_attr "mode" "SF,SF,V4SF")])
11556 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11557 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11558 (vec_select:AVX256MODEF2P
11559 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11560 (match_parallel 2 "avx_vbroadcast_operand"
11561 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11564 "&& reload_completed"
11565 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11567 rtx op0 = operands[0], op1 = operands[1];
11568 int elt = INTVAL (operands[3]);
11574 /* Shuffle element we care about into all elements of the 128-bit lane.
11575 The other lane gets shuffled too, but we don't care. */
11576 if (<MODE>mode == V4DFmode)
11577 mask = (elt & 1 ? 15 : 0);
11579 mask = (elt & 3) * 0x55;
11580 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11582 /* Shuffle the lane we care about into both lanes of the dest. */
11583 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11584 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11588 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11589 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11592 (define_expand "avx_vpermil<mode>"
11593 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11594 (vec_select:AVXMODEFDP
11595 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11596 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11599 int mask = INTVAL (operands[2]);
11600 rtx perm[<ssescalarnum>];
11602 perm[0] = GEN_INT (mask & 1);
11603 perm[1] = GEN_INT ((mask >> 1) & 1);
11604 if (<MODE>mode == V4DFmode)
11606 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11607 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11611 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11614 (define_expand "avx_vpermil<mode>"
11615 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11616 (vec_select:AVXMODEFSP
11617 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11618 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11621 int mask = INTVAL (operands[2]);
11622 rtx perm[<ssescalarnum>];
11624 perm[0] = GEN_INT (mask & 3);
11625 perm[1] = GEN_INT ((mask >> 2) & 3);
11626 perm[2] = GEN_INT ((mask >> 4) & 3);
11627 perm[3] = GEN_INT ((mask >> 6) & 3);
11628 if (<MODE>mode == V8SFmode)
11630 perm[4] = GEN_INT ((mask & 3) + 4);
11631 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11632 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11633 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11637 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11640 (define_insn "*avx_vpermilp<mode>"
11641 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11642 (vec_select:AVXMODEF2P
11643 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11644 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11645 [(match_operand 3 "const_int_operand" "")])))]
11648 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11649 operands[2] = GEN_INT (mask);
11650 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11652 [(set_attr "type" "sselog")
11653 (set_attr "prefix_extra" "1")
11654 (set_attr "length_immediate" "1")
11655 (set_attr "prefix" "vex")
11656 (set_attr "mode" "<MODE>")])
11658 (define_insn "avx_vpermilvar<mode>3"
11659 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11661 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11662 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11665 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11666 [(set_attr "type" "sselog")
11667 (set_attr "prefix_extra" "1")
11668 (set_attr "prefix" "vex")
11669 (set_attr "mode" "<MODE>")])
11671 (define_expand "avx_vperm2f128<mode>3"
11672 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11673 (unspec:AVX256MODE2P
11674 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11675 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11676 (match_operand:SI 3 "const_0_to_255_operand" "")]
11677 UNSPEC_VPERMIL2F128))]
11680 int mask = INTVAL (operands[3]);
11681 if ((mask & 0x88) == 0)
11683 rtx perm[<ssescalarnum>], t1, t2;
11684 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11686 base = (mask & 3) * nelt2;
11687 for (i = 0; i < nelt2; ++i)
11688 perm[i] = GEN_INT (base + i);
11690 base = ((mask >> 4) & 3) * nelt2;
11691 for (i = 0; i < nelt2; ++i)
11692 perm[i + nelt2] = GEN_INT (base + i);
11694 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
11695 operands[1], operands[2]);
11696 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11697 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11698 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11704 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11705 ;; means that in order to represent this properly in rtl we'd have to
11706 ;; nest *another* vec_concat with a zero operand and do the select from
11707 ;; a 4x wide vector. That doesn't seem very nice.
11708 (define_insn "*avx_vperm2f128<mode>_full"
11709 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11710 (unspec:AVX256MODE2P
11711 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11712 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11713 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11714 UNSPEC_VPERMIL2F128))]
11716 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11717 [(set_attr "type" "sselog")
11718 (set_attr "prefix_extra" "1")
11719 (set_attr "length_immediate" "1")
11720 (set_attr "prefix" "vex")
11721 (set_attr "mode" "V8SF")])
11723 (define_insn "*avx_vperm2f128<mode>_nozero"
11724 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11725 (vec_select:AVX256MODE2P
11726 (vec_concat:<ssedoublesizemode>
11727 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11728 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11729 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
11730 [(match_operand 4 "const_int_operand" "")])))]
11733 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11734 operands[3] = GEN_INT (mask);
11735 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11737 [(set_attr "type" "sselog")
11738 (set_attr "prefix_extra" "1")
11739 (set_attr "length_immediate" "1")
11740 (set_attr "prefix" "vex")
11741 (set_attr "mode" "V8SF")])
11743 (define_expand "avx_vinsertf128<mode>"
11744 [(match_operand:AVX256MODE 0 "register_operand" "")
11745 (match_operand:AVX256MODE 1 "register_operand" "")
11746 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11747 (match_operand:SI 3 "const_0_to_1_operand" "")]
11750 switch (INTVAL (operands[3]))
11753 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
11757 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
11761 gcc_unreachable ();
11766 (define_insn "vec_set_lo_<mode>"
11767 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11768 (vec_concat:AVX256MODE4P
11769 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11770 (vec_select:<avxhalfvecmode>
11771 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11772 (parallel [(const_int 2) (const_int 3)]))))]
11774 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11775 [(set_attr "type" "sselog")
11776 (set_attr "prefix_extra" "1")
11777 (set_attr "length_immediate" "1")
11778 (set_attr "prefix" "vex")
11779 (set_attr "mode" "V8SF")])
11781 (define_insn "vec_set_hi_<mode>"
11782 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11783 (vec_concat:AVX256MODE4P
11784 (vec_select:<avxhalfvecmode>
11785 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11786 (parallel [(const_int 0) (const_int 1)]))
11787 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11789 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11790 [(set_attr "type" "sselog")
11791 (set_attr "prefix_extra" "1")
11792 (set_attr "length_immediate" "1")
11793 (set_attr "prefix" "vex")
11794 (set_attr "mode" "V8SF")])
11796 (define_insn "vec_set_lo_<mode>"
11797 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11798 (vec_concat:AVX256MODE8P
11799 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11800 (vec_select:<avxhalfvecmode>
11801 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11802 (parallel [(const_int 4) (const_int 5)
11803 (const_int 6) (const_int 7)]))))]
11805 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11806 [(set_attr "type" "sselog")
11807 (set_attr "prefix_extra" "1")
11808 (set_attr "length_immediate" "1")
11809 (set_attr "prefix" "vex")
11810 (set_attr "mode" "V8SF")])
11812 (define_insn "vec_set_hi_<mode>"
11813 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11814 (vec_concat:AVX256MODE8P
11815 (vec_select:<avxhalfvecmode>
11816 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11817 (parallel [(const_int 0) (const_int 1)
11818 (const_int 2) (const_int 3)]))
11819 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11821 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11822 [(set_attr "type" "sselog")
11823 (set_attr "prefix_extra" "1")
11824 (set_attr "length_immediate" "1")
11825 (set_attr "prefix" "vex")
11826 (set_attr "mode" "V8SF")])
11828 (define_insn "vec_set_lo_v16hi"
11829 [(set (match_operand:V16HI 0 "register_operand" "=x")
11831 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11833 (match_operand:V16HI 1 "register_operand" "x")
11834 (parallel [(const_int 8) (const_int 9)
11835 (const_int 10) (const_int 11)
11836 (const_int 12) (const_int 13)
11837 (const_int 14) (const_int 15)]))))]
11839 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11840 [(set_attr "type" "sselog")
11841 (set_attr "prefix_extra" "1")
11842 (set_attr "length_immediate" "1")
11843 (set_attr "prefix" "vex")
11844 (set_attr "mode" "V8SF")])
11846 (define_insn "vec_set_hi_v16hi"
11847 [(set (match_operand:V16HI 0 "register_operand" "=x")
11850 (match_operand:V16HI 1 "register_operand" "x")
11851 (parallel [(const_int 0) (const_int 1)
11852 (const_int 2) (const_int 3)
11853 (const_int 4) (const_int 5)
11854 (const_int 6) (const_int 7)]))
11855 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11857 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11858 [(set_attr "type" "sselog")
11859 (set_attr "prefix_extra" "1")
11860 (set_attr "length_immediate" "1")
11861 (set_attr "prefix" "vex")
11862 (set_attr "mode" "V8SF")])
11864 (define_insn "vec_set_lo_v32qi"
11865 [(set (match_operand:V32QI 0 "register_operand" "=x")
11867 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11869 (match_operand:V32QI 1 "register_operand" "x")
11870 (parallel [(const_int 16) (const_int 17)
11871 (const_int 18) (const_int 19)
11872 (const_int 20) (const_int 21)
11873 (const_int 22) (const_int 23)
11874 (const_int 24) (const_int 25)
11875 (const_int 26) (const_int 27)
11876 (const_int 28) (const_int 29)
11877 (const_int 30) (const_int 31)]))))]
11879 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11880 [(set_attr "type" "sselog")
11881 (set_attr "prefix_extra" "1")
11882 (set_attr "length_immediate" "1")
11883 (set_attr "prefix" "vex")
11884 (set_attr "mode" "V8SF")])
11886 (define_insn "vec_set_hi_v32qi"
11887 [(set (match_operand:V32QI 0 "register_operand" "=x")
11890 (match_operand:V32QI 1 "register_operand" "x")
11891 (parallel [(const_int 0) (const_int 1)
11892 (const_int 2) (const_int 3)
11893 (const_int 4) (const_int 5)
11894 (const_int 6) (const_int 7)
11895 (const_int 8) (const_int 9)
11896 (const_int 10) (const_int 11)
11897 (const_int 12) (const_int 13)
11898 (const_int 14) (const_int 15)]))
11899 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11901 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11902 [(set_attr "type" "sselog")
11903 (set_attr "prefix_extra" "1")
11904 (set_attr "length_immediate" "1")
11905 (set_attr "prefix" "vex")
11906 (set_attr "mode" "V8SF")])
11908 (define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
11909 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11911 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
11912 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11916 "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
11917 [(set_attr "type" "sselog1")
11918 (set_attr "prefix_extra" "1")
11919 (set_attr "prefix" "vex")
11920 (set_attr "mode" "<MODE>")])
11922 (define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
11923 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
11925 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11926 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11928 UNSPEC_MASKSTORE))]
11930 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11931 [(set_attr "type" "sselog1")
11932 (set_attr "prefix_extra" "1")
11933 (set_attr "prefix" "vex")
11934 (set_attr "mode" "<MODE>")])
11936 (define_insn_and_split "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
11937 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
11938 (unspec:AVX256MODE2P
11939 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "xm,x")]
11943 "&& reload_completed"
11946 rtx op1 = operands[1];
11948 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
11950 op1 = gen_lowpart (<MODE>mode, op1);
11951 emit_move_insn (operands[0], op1);
11955 (define_expand "vec_init<mode>"
11956 [(match_operand:AVX256MODE 0 "register_operand" "")
11957 (match_operand 1 "" "")]
11960 ix86_expand_vector_init (false, operands[0], operands[1]);
11964 (define_insn "*vec_concat<mode>_avx"
11965 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
11966 (vec_concat:AVX256MODE
11967 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
11968 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
11971 switch (which_alternative)
11974 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
11976 switch (get_attr_mode (insn))
11979 return "vmovaps\t{%1, %x0|%x0, %1}";
11981 return "vmovapd\t{%1, %x0|%x0, %1}";
11983 return "vmovdqa\t{%1, %x0|%x0, %1}";
11986 gcc_unreachable ();
11989 [(set_attr "type" "sselog,ssemov")
11990 (set_attr "prefix_extra" "1,*")
11991 (set_attr "length_immediate" "1,*")
11992 (set_attr "prefix" "vex")
11993 (set_attr "mode" "<avxvecmode>")])
11995 (define_insn "vcvtph2ps"
11996 [(set (match_operand:V4SF 0 "register_operand" "=x")
11998 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12000 (parallel [(const_int 0) (const_int 1)
12001 (const_int 1) (const_int 2)])))]
12003 "vcvtph2ps\t{%1, %0|%0, %1}"
12004 [(set_attr "type" "ssecvt")
12005 (set_attr "prefix" "vex")
12006 (set_attr "mode" "V4SF")])
12008 (define_insn "*vcvtph2ps_load"
12009 [(set (match_operand:V4SF 0 "register_operand" "=x")
12010 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12011 UNSPEC_VCVTPH2PS))]
12013 "vcvtph2ps\t{%1, %0|%0, %1}"
12014 [(set_attr "type" "ssecvt")
12015 (set_attr "prefix" "vex")
12016 (set_attr "mode" "V8SF")])
12018 (define_insn "vcvtph2ps256"
12019 [(set (match_operand:V8SF 0 "register_operand" "=x")
12020 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12021 UNSPEC_VCVTPH2PS))]
12023 "vcvtph2ps\t{%1, %0|%0, %1}"
12024 [(set_attr "type" "ssecvt")
12025 (set_attr "prefix" "vex")
12026 (set_attr "mode" "V8SF")])
12028 (define_expand "vcvtps2ph"
12029 [(set (match_operand:V8HI 0 "register_operand" "")
12031 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12032 (match_operand:SI 2 "immediate_operand" "")]
12036 "operands[3] = CONST0_RTX (V4HImode);")
12038 (define_insn "*vcvtps2ph"
12039 [(set (match_operand:V8HI 0 "register_operand" "=x")
12041 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12042 (match_operand:SI 2 "immediate_operand" "N")]
12044 (match_operand:V4HI 3 "const0_operand" "")))]
12046 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12047 [(set_attr "type" "ssecvt")
12048 (set_attr "prefix" "vex")
12049 (set_attr "mode" "V4SF")])
12051 (define_insn "*vcvtps2ph_store"
12052 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12053 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12054 (match_operand:SI 2 "immediate_operand" "N")]
12055 UNSPEC_VCVTPS2PH))]
12057 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12058 [(set_attr "type" "ssecvt")
12059 (set_attr "prefix" "vex")
12060 (set_attr "mode" "V4SF")])
12062 (define_insn "vcvtps2ph256"
12063 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12064 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12065 (match_operand:SI 2 "immediate_operand" "N")]
12066 UNSPEC_VCVTPS2PH))]
12068 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12069 [(set_attr "type" "ssecvt")
12070 (set_attr "prefix" "vex")
12071 (set_attr "mode" "V8SF")])