1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V1TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 ;; All QImode vector integer modes
77 (define_mode_iterator VI1
78 [(V32QI "TARGET_AVX") V16QI])
80 ;; All DImode vector integer modes
81 (define_mode_iterator VI8
82 [(V4DI "TARGET_AVX") V2DI])
84 ;; All 128bit vector integer modes
85 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
87 ;; Random 128bit vector integer mode combinations
88 (define_mode_iterator VI12_128 [V16QI V8HI])
89 (define_mode_iterator VI14_128 [V16QI V4SI])
90 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
91 (define_mode_iterator VI24_128 [V8HI V4SI])
92 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
94 ;; Int-float size matches
95 (define_mode_iterator VI4F_128 [V4SI V4SF])
96 (define_mode_iterator VI8F_128 [V2DI V2DF])
97 (define_mode_iterator VI4F_256 [V8SI V8SF])
98 (define_mode_iterator VI8F_256 [V4DI V4DF])
100 ;; Mapping from float mode to required SSE level
101 (define_mode_attr sse
102 [(SF "sse") (DF "sse2")
103 (V4SF "sse") (V2DF "sse2")
104 (V8SF "avx") (V4DF "avx")])
106 (define_mode_attr sse2
107 [(V16QI "sse2") (V32QI "avx")
108 (V2DI "sse2") (V4DI "avx")])
110 (define_mode_attr sse3
111 [(V16QI "sse3") (V32QI "avx")])
113 (define_mode_attr sse4_1
114 [(V4SF "sse4_1") (V2DF "sse4_1")
115 (V8SF "avx") (V4DF "avx")])
117 (define_mode_attr avxsizesuffix
118 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
119 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
120 (V8SF "256") (V4DF "256")
121 (V4SF "") (V2DF "")])
123 ;; SSE instruction mode
124 (define_mode_attr sseinsnmode
125 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
126 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
127 (V8SF "V8SF") (V4DF "V4DF")
128 (V4SF "V4SF") (V2DF "V2DF")])
130 ;; Mapping of vector float modes to an integer mode of the same size
131 (define_mode_attr sseintvecmode
132 [(V8SF "V8SI") (V4DF "V4DI")
133 (V4SF "V4SI") (V2DF "V2DI")])
135 ;; Mapping of vector modes to a vector mode of double size
136 (define_mode_attr ssedoublevecmode
137 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
138 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
139 (V8SF "V16SF") (V4DF "V8DF")
140 (V4SF "V8SF") (V2DF "V4DF")])
142 ;; Mapping of vector modes to a vector mode of half size
143 (define_mode_attr ssehalfvecmode
144 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
145 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
146 (V8SF "V4SF") (V4DF "V2DF")
149 ;; Mapping of vector modes back to the scalar modes
150 (define_mode_attr ssescalarmode
151 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
152 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
153 (V8SF "SF") (V4DF "DF")
154 (V4SF "SF") (V2DF "DF")])
156 ;; Number of scalar elements in each vector type
157 (define_mode_attr ssescalarnum
158 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
159 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
160 (V8SF "8") (V4DF "4")
161 (V4SF "4") (V2DF "2")])
163 ;; SSE scalar suffix for vector modes
164 (define_mode_attr ssescalarmodesuffix
165 [(V8SF "ss") (V4DF "sd")
166 (V4SF "ss") (V2DF "sd")
167 (V8SI "ss") (V4DI "sd")
170 ;; Pack/unpack vector modes
171 (define_mode_attr sseunpackmode
172 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
174 (define_mode_attr ssepackmode
175 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")])
177 ;; Mapping of the max integer size for xop rotate immediate constraint
178 (define_mode_attr sserotatemax
179 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
181 ;; Mapping of immediate bits for blend instructions
182 (define_mode_attr blendbits
183 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
185 ;; Instruction suffix for sign and zero extensions.
186 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
191 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
192 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
194 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
196 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
198 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
202 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
204 ;; All of these patterns are enabled for SSE1 as well as SSE2.
205 ;; This is essential for maintaining stable calling conventions.
207 (define_expand "mov<mode>"
208 [(set (match_operand:V16 0 "nonimmediate_operand" "")
209 (match_operand:V16 1 "nonimmediate_operand" ""))]
212 ix86_expand_vector_move (<MODE>mode, operands);
216 (define_insn "*mov<mode>_internal"
217 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
218 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
220 && (register_operand (operands[0], <MODE>mode)
221 || register_operand (operands[1], <MODE>mode))"
223 switch (which_alternative)
226 return standard_sse_constant_opcode (insn, operands[1]);
229 switch (get_attr_mode (insn))
234 && (misaligned_operand (operands[0], <MODE>mode)
235 || misaligned_operand (operands[1], <MODE>mode)))
236 return "vmovups\t{%1, %0|%0, %1}";
238 return "%vmovaps\t{%1, %0|%0, %1}";
243 && (misaligned_operand (operands[0], <MODE>mode)
244 || misaligned_operand (operands[1], <MODE>mode)))
245 return "vmovupd\t{%1, %0|%0, %1}";
246 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
247 return "%vmovaps\t{%1, %0|%0, %1}";
249 return "%vmovapd\t{%1, %0|%0, %1}";
254 && (misaligned_operand (operands[0], <MODE>mode)
255 || misaligned_operand (operands[1], <MODE>mode)))
256 return "vmovdqu\t{%1, %0|%0, %1}";
257 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
258 return "%vmovaps\t{%1, %0|%0, %1}";
260 return "%vmovdqa\t{%1, %0|%0, %1}";
269 [(set_attr "type" "sselog1,ssemov,ssemov")
270 (set_attr "prefix" "maybe_vex")
272 (cond [(ne (symbol_ref "TARGET_AVX") (const_int 0))
273 (const_string "<sseinsnmode>")
275 (ne (symbol_ref "optimize_function_for_size_p (cfun)")
277 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
278 (and (eq_attr "alternative" "2")
279 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
281 (const_string "V4SF")
282 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
283 (const_string "V4SF")
284 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
285 (const_string "V2DF")
287 (const_string "TI")))])
289 (define_insn "sse2_movq128"
290 [(set (match_operand:V2DI 0 "register_operand" "=x")
293 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
294 (parallel [(const_int 0)]))
297 "%vmovq\t{%1, %0|%0, %1}"
298 [(set_attr "type" "ssemov")
299 (set_attr "prefix" "maybe_vex")
300 (set_attr "mode" "TI")])
302 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
303 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
304 ;; from memory, we'd prefer to load the memory directly into the %xmm
305 ;; register. To facilitate this happy circumstance, this pattern won't
306 ;; split until after register allocation. If the 64-bit value didn't
307 ;; come from memory, this is the best we can do. This is much better
308 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
311 (define_insn_and_split "movdi_to_sse"
313 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
314 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
315 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
316 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
318 "&& reload_completed"
321 if (register_operand (operands[1], DImode))
323 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
324 Assemble the 64-bit DImode value in an xmm register. */
325 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
326 gen_rtx_SUBREG (SImode, operands[1], 0)));
327 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
328 gen_rtx_SUBREG (SImode, operands[1], 4)));
329 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
332 else if (memory_operand (operands[1], DImode))
333 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
334 operands[1], const0_rtx));
340 [(set (match_operand:V4SF 0 "register_operand" "")
341 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
342 "TARGET_SSE && reload_completed"
345 (vec_duplicate:V4SF (match_dup 1))
349 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
350 operands[2] = CONST0_RTX (V4SFmode);
354 [(set (match_operand:V2DF 0 "register_operand" "")
355 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
356 "TARGET_SSE2 && reload_completed"
357 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
359 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
360 operands[2] = CONST0_RTX (DFmode);
363 (define_expand "push<mode>1"
364 [(match_operand:V16 0 "register_operand" "")]
367 ix86_expand_push (<MODE>mode, operands[0]);
371 (define_expand "movmisalign<mode>"
372 [(set (match_operand:V16 0 "nonimmediate_operand" "")
373 (match_operand:V16 1 "nonimmediate_operand" ""))]
376 ix86_expand_vector_move_misalign (<MODE>mode, operands);
380 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
381 [(set (match_operand:VF 0 "nonimmediate_operand" "")
383 [(match_operand:VF 1 "nonimmediate_operand" "")]
387 if (MEM_P (operands[0]) && MEM_P (operands[1]))
388 operands[1] = force_reg (<MODE>mode, operands[1]);
391 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
392 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
394 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
396 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
397 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
398 [(set_attr "type" "ssemov")
399 (set_attr "movu" "1")
400 (set_attr "prefix" "maybe_vex")
401 (set_attr "mode" "<MODE>")])
403 (define_expand "<sse2>_movdqu<avxsizesuffix>"
404 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
405 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
409 if (MEM_P (operands[0]) && MEM_P (operands[1]))
410 operands[1] = force_reg (<MODE>mode, operands[1]);
413 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
414 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
415 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
417 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
418 "%vmovdqu\t{%1, %0|%0, %1}"
419 [(set_attr "type" "ssemov")
420 (set_attr "movu" "1")
421 (set (attr "prefix_data16")
423 (ne (symbol_ref "TARGET_AVX") (const_int 0))
426 (set_attr "prefix" "maybe_vex")
427 (set_attr "mode" "<sseinsnmode>")])
429 (define_insn "<sse3>_lddqu<avxsizesuffix>"
430 [(set (match_operand:VI1 0 "register_operand" "=x")
431 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
434 "%vlddqu\t{%1, %0|%0, %1}"
435 [(set_attr "type" "ssemov")
436 (set_attr "movu" "1")
437 (set (attr "prefix_data16")
439 (ne (symbol_ref "TARGET_AVX") (const_int 0))
442 (set (attr "prefix_rep")
444 (ne (symbol_ref "TARGET_AVX") (const_int 0))
447 (set_attr "prefix" "maybe_vex")
448 (set_attr "mode" "<sseinsnmode>")])
450 (define_insn "sse2_movntsi"
451 [(set (match_operand:SI 0 "memory_operand" "=m")
452 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
455 "movnti\t{%1, %0|%0, %1}"
456 [(set_attr "type" "ssemov")
457 (set_attr "prefix_data16" "0")
458 (set_attr "mode" "V2DF")])
460 (define_insn "<sse>_movnt<mode>"
461 [(set (match_operand:VF 0 "memory_operand" "=m")
462 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
465 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
466 [(set_attr "type" "ssemov")
467 (set_attr "prefix" "maybe_vex")
468 (set_attr "mode" "<MODE>")])
470 (define_insn "<sse2>_movnt<mode>"
471 [(set (match_operand:VI8 0 "memory_operand" "=m")
472 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
475 "%vmovntdq\t{%1, %0|%0, %1}"
476 [(set_attr "type" "ssecvt")
477 (set (attr "prefix_data16")
479 (ne (symbol_ref "TARGET_AVX") (const_int 0))
482 (set_attr "prefix" "maybe_vex")
483 (set_attr "mode" "<sseinsnmode>")])
485 ; Expand patterns for non-temporal stores. At the moment, only those
486 ; that directly map to insns are defined; it would be possible to
487 ; define patterns for other modes that would expand to several insns.
489 ;; Modes handled by storent patterns.
490 (define_mode_iterator STORENT_MODE
491 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
493 (V8SF "TARGET_AVX") V4SF
494 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
496 (define_expand "storent<mode>"
497 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
499 [(match_operand:STORENT_MODE 1 "register_operand" "")]
503 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505 ;; Parallel floating point arithmetic
507 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
509 (define_expand "<code><mode>2"
510 [(set (match_operand:VF 0 "register_operand" "")
512 (match_operand:VF 1 "register_operand" "")))]
514 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
516 (define_insn_and_split "*absneg<mode>2"
517 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
518 (match_operator:VF 3 "absneg_operator"
519 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
520 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
526 enum rtx_code absneg_op;
532 if (MEM_P (operands[1]))
533 op1 = operands[2], op2 = operands[1];
535 op1 = operands[1], op2 = operands[2];
540 if (rtx_equal_p (operands[0], operands[1]))
546 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
547 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
548 t = gen_rtx_SET (VOIDmode, operands[0], t);
552 [(set_attr "isa" "noavx,noavx,avx,avx")])
554 (define_expand "<plusminus_insn><mode>3"
555 [(set (match_operand:VF 0 "register_operand" "")
557 (match_operand:VF 1 "nonimmediate_operand" "")
558 (match_operand:VF 2 "nonimmediate_operand" "")))]
560 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
562 (define_insn "*<plusminus_insn><mode>3"
563 [(set (match_operand:VF 0 "register_operand" "=x,x")
565 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
566 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
567 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
569 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
570 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
571 [(set_attr "isa" "noavx,avx")
572 (set_attr "type" "sseadd")
573 (set_attr "prefix" "orig,vex")
574 (set_attr "mode" "<MODE>")])
576 (define_insn "<sse>_vm<plusminus_insn><mode>3"
577 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
580 (match_operand:VF_128 1 "register_operand" "0,x")
581 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
586 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
587 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
588 [(set_attr "isa" "noavx,avx")
589 (set_attr "type" "sseadd")
590 (set_attr "prefix" "orig,vex")
591 (set_attr "mode" "<ssescalarmode>")])
593 (define_expand "mul<mode>3"
594 [(set (match_operand:VF 0 "register_operand" "")
596 (match_operand:VF 1 "nonimmediate_operand" "")
597 (match_operand:VF 2 "nonimmediate_operand" "")))]
599 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
601 (define_insn "*mul<mode>3"
602 [(set (match_operand:VF 0 "register_operand" "=x,x")
604 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
605 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
606 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
608 mul<ssemodesuffix>\t{%2, %0|%0, %2}
609 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
610 [(set_attr "isa" "noavx,avx")
611 (set_attr "type" "ssemul")
612 (set_attr "prefix" "orig,vex")
613 (set_attr "mode" "<MODE>")])
615 (define_insn "<sse>_vmmul<mode>3"
616 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
619 (match_operand:VF_128 1 "register_operand" "0,x")
620 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
625 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
626 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
627 [(set_attr "isa" "noavx,avx")
628 (set_attr "type" "ssemul")
629 (set_attr "prefix" "orig,vex")
630 (set_attr "mode" "<ssescalarmode>")])
632 (define_expand "div<mode>3"
633 [(set (match_operand:VF2 0 "register_operand" "")
634 (div:VF2 (match_operand:VF2 1 "register_operand" "")
635 (match_operand:VF2 2 "nonimmediate_operand" "")))]
637 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
639 (define_expand "div<mode>3"
640 [(set (match_operand:VF1 0 "register_operand" "")
641 (div:VF1 (match_operand:VF1 1 "register_operand" "")
642 (match_operand:VF1 2 "nonimmediate_operand" "")))]
645 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
647 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
648 && flag_finite_math_only && !flag_trapping_math
649 && flag_unsafe_math_optimizations)
651 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
656 (define_insn "<sse>_div<mode>3"
657 [(set (match_operand:VF 0 "register_operand" "=x,x")
659 (match_operand:VF 1 "register_operand" "0,x")
660 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
663 div<ssemodesuffix>\t{%2, %0|%0, %2}
664 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
665 [(set_attr "isa" "noavx,avx")
666 (set_attr "type" "ssediv")
667 (set_attr "prefix" "orig,vex")
668 (set_attr "mode" "<MODE>")])
670 (define_insn "<sse>_vmdiv<mode>3"
671 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
674 (match_operand:VF_128 1 "register_operand" "0,x")
675 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
680 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
681 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
682 [(set_attr "isa" "noavx,avx")
683 (set_attr "type" "ssediv")
684 (set_attr "prefix" "orig,vex")
685 (set_attr "mode" "<ssescalarmode>")])
687 (define_insn "<sse>_rcp<mode>2"
688 [(set (match_operand:VF1 0 "register_operand" "=x")
690 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
692 "%vrcpps\t{%1, %0|%0, %1}"
693 [(set_attr "type" "sse")
694 (set_attr "atom_sse_attr" "rcp")
695 (set_attr "prefix" "maybe_vex")
696 (set_attr "mode" "<MODE>")])
698 (define_insn "sse_vmrcpv4sf2"
699 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
701 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
703 (match_operand:V4SF 2 "register_operand" "0,x")
707 rcpss\t{%1, %0|%0, %1}
708 vrcpss\t{%1, %2, %0|%0, %2, %1}"
709 [(set_attr "isa" "noavx,avx")
710 (set_attr "type" "sse")
711 (set_attr "atom_sse_attr" "rcp")
712 (set_attr "prefix" "orig,vex")
713 (set_attr "mode" "SF")])
715 (define_expand "sqrt<mode>2"
716 [(set (match_operand:VF2 0 "register_operand" "")
717 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
720 (define_expand "sqrt<mode>2"
721 [(set (match_operand:VF1 0 "register_operand" "")
722 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
725 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
726 && flag_finite_math_only && !flag_trapping_math
727 && flag_unsafe_math_optimizations)
729 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
734 (define_insn "<sse>_sqrt<mode>2"
735 [(set (match_operand:VF 0 "register_operand" "=x")
736 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
738 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
739 [(set_attr "type" "sse")
740 (set_attr "atom_sse_attr" "sqrt")
741 (set_attr "prefix" "maybe_vex")
742 (set_attr "mode" "<MODE>")])
744 (define_insn "<sse>_vmsqrt<mode>2"
745 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
748 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
749 (match_operand:VF_128 2 "register_operand" "0,x")
753 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
754 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
755 [(set_attr "isa" "noavx,avx")
756 (set_attr "type" "sse")
757 (set_attr "atom_sse_attr" "sqrt")
758 (set_attr "prefix" "orig,vex")
759 (set_attr "mode" "<ssescalarmode>")])
761 (define_expand "rsqrt<mode>2"
762 [(set (match_operand:VF1 0 "register_operand" "")
764 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
767 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
771 (define_insn "<sse>_rsqrt<mode>2"
772 [(set (match_operand:VF1 0 "register_operand" "=x")
774 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
776 "%vrsqrtps\t{%1, %0|%0, %1}"
777 [(set_attr "type" "sse")
778 (set_attr "prefix" "maybe_vex")
779 (set_attr "mode" "<MODE>")])
781 (define_insn "sse_vmrsqrtv4sf2"
782 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
784 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
786 (match_operand:V4SF 2 "register_operand" "0,x")
790 rsqrtss\t{%1, %0|%0, %1}
791 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
792 [(set_attr "isa" "noavx,avx")
793 (set_attr "type" "sse")
794 (set_attr "prefix" "orig,vex")
795 (set_attr "mode" "SF")])
797 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
798 ;; isn't really correct, as those rtl operators aren't defined when
799 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
801 (define_expand "<code><mode>3"
802 [(set (match_operand:VF 0 "register_operand" "")
804 (match_operand:VF 1 "nonimmediate_operand" "")
805 (match_operand:VF 2 "nonimmediate_operand" "")))]
808 if (!flag_finite_math_only)
809 operands[1] = force_reg (<MODE>mode, operands[1]);
810 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
813 (define_insn "*<code><mode>3_finite"
814 [(set (match_operand:VF 0 "register_operand" "=x,x")
816 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
817 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
818 "TARGET_SSE && flag_finite_math_only
819 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
821 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
822 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
823 [(set_attr "isa" "noavx,avx")
824 (set_attr "type" "sseadd")
825 (set_attr "prefix" "orig,vex")
826 (set_attr "mode" "<MODE>")])
828 (define_insn "*<code><mode>3"
829 [(set (match_operand:VF 0 "register_operand" "=x,x")
831 (match_operand:VF 1 "register_operand" "0,x")
832 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
833 "TARGET_SSE && !flag_finite_math_only"
835 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
836 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
837 [(set_attr "isa" "noavx,avx")
838 (set_attr "type" "sseadd")
839 (set_attr "prefix" "orig,vex")
840 (set_attr "mode" "<MODE>")])
842 (define_insn "<sse>_vm<code><mode>3"
843 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
846 (match_operand:VF_128 1 "register_operand" "0,x")
847 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
852 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
853 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
854 [(set_attr "isa" "noavx,avx")
855 (set_attr "type" "sse")
856 (set_attr "prefix" "orig,vex")
857 (set_attr "mode" "<ssescalarmode>")])
859 ;; These versions of the min/max patterns implement exactly the operations
860 ;; min = (op1 < op2 ? op1 : op2)
861 ;; max = (!(op1 < op2) ? op1 : op2)
862 ;; Their operands are not commutative, and thus they may be used in the
863 ;; presence of -0.0 and NaN.
865 (define_insn "*ieee_smin<mode>3"
866 [(set (match_operand:VF 0 "register_operand" "=x,x")
868 [(match_operand:VF 1 "register_operand" "0,x")
869 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
873 min<ssemodesuffix>\t{%2, %0|%0, %2}
874 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
875 [(set_attr "isa" "noavx,avx")
876 (set_attr "type" "sseadd")
877 (set_attr "prefix" "orig,vex")
878 (set_attr "mode" "<MODE>")])
880 (define_insn "*ieee_smax<mode>3"
881 [(set (match_operand:VF 0 "register_operand" "=x,x")
883 [(match_operand:VF 1 "register_operand" "0,x")
884 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
888 max<ssemodesuffix>\t{%2, %0|%0, %2}
889 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
890 [(set_attr "isa" "noavx,avx")
891 (set_attr "type" "sseadd")
892 (set_attr "prefix" "orig,vex")
893 (set_attr "mode" "<MODE>")])
895 (define_insn "avx_addsubv4df3"
896 [(set (match_operand:V4DF 0 "register_operand" "=x")
899 (match_operand:V4DF 1 "register_operand" "x")
900 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
901 (minus:V4DF (match_dup 1) (match_dup 2))
904 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
905 [(set_attr "type" "sseadd")
906 (set_attr "prefix" "vex")
907 (set_attr "mode" "V4DF")])
909 (define_insn "sse3_addsubv2df3"
910 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
913 (match_operand:V2DF 1 "register_operand" "0,x")
914 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
915 (minus:V2DF (match_dup 1) (match_dup 2))
919 addsubpd\t{%2, %0|%0, %2}
920 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
921 [(set_attr "isa" "noavx,avx")
922 (set_attr "type" "sseadd")
923 (set_attr "atom_unit" "complex")
924 (set_attr "prefix" "orig,vex")
925 (set_attr "mode" "V2DF")])
927 (define_insn "avx_addsubv8sf3"
928 [(set (match_operand:V8SF 0 "register_operand" "=x")
931 (match_operand:V8SF 1 "register_operand" "x")
932 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
933 (minus:V8SF (match_dup 1) (match_dup 2))
936 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
937 [(set_attr "type" "sseadd")
938 (set_attr "prefix" "vex")
939 (set_attr "mode" "V8SF")])
941 (define_insn "sse3_addsubv4sf3"
942 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
945 (match_operand:V4SF 1 "register_operand" "0,x")
946 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
947 (minus:V4SF (match_dup 1) (match_dup 2))
951 addsubps\t{%2, %0|%0, %2}
952 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
953 [(set_attr "isa" "noavx,avx")
954 (set_attr "type" "sseadd")
955 (set_attr "prefix" "orig,vex")
956 (set_attr "prefix_rep" "1,*")
957 (set_attr "mode" "V4SF")])
959 (define_insn "avx_h<plusminus_insn>v4df3"
960 [(set (match_operand:V4DF 0 "register_operand" "=x")
965 (match_operand:V4DF 1 "register_operand" "x")
966 (parallel [(const_int 0)]))
967 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
969 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
970 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
974 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
975 (parallel [(const_int 0)]))
976 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
978 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
979 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
981 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
982 [(set_attr "type" "sseadd")
983 (set_attr "prefix" "vex")
984 (set_attr "mode" "V4DF")])
986 (define_insn "sse3_h<plusminus_insn>v2df3"
987 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
991 (match_operand:V2DF 1 "register_operand" "0,x")
992 (parallel [(const_int 0)]))
993 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
996 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
997 (parallel [(const_int 0)]))
998 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1001 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1002 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1003 [(set_attr "isa" "noavx,avx")
1004 (set_attr "type" "sseadd")
1005 (set_attr "prefix" "orig,vex")
1006 (set_attr "mode" "V2DF")])
1008 (define_insn "avx_h<plusminus_insn>v8sf3"
1009 [(set (match_operand:V8SF 0 "register_operand" "=x")
1015 (match_operand:V8SF 1 "register_operand" "x")
1016 (parallel [(const_int 0)]))
1017 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1019 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1020 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1024 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1025 (parallel [(const_int 0)]))
1026 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1028 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1029 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1033 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1034 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1036 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1037 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1040 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1041 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1043 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1044 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1046 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1047 [(set_attr "type" "sseadd")
1048 (set_attr "prefix" "vex")
1049 (set_attr "mode" "V8SF")])
1051 (define_insn "sse3_h<plusminus_insn>v4sf3"
1052 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1057 (match_operand:V4SF 1 "register_operand" "0,x")
1058 (parallel [(const_int 0)]))
1059 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1061 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1062 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1066 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1067 (parallel [(const_int 0)]))
1068 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1070 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1071 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1074 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1075 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1076 [(set_attr "isa" "noavx,avx")
1077 (set_attr "type" "sseadd")
1078 (set_attr "atom_unit" "complex")
1079 (set_attr "prefix" "orig,vex")
1080 (set_attr "prefix_rep" "1,*")
1081 (set_attr "mode" "V4SF")])
1083 (define_expand "reduc_splus_v4df"
1084 [(match_operand:V4DF 0 "register_operand" "")
1085 (match_operand:V4DF 1 "register_operand" "")]
1088 rtx tmp = gen_reg_rtx (V4DFmode);
1089 rtx tmp2 = gen_reg_rtx (V4DFmode);
1090 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1091 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1092 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1096 (define_expand "reduc_splus_v2df"
1097 [(match_operand:V2DF 0 "register_operand" "")
1098 (match_operand:V2DF 1 "register_operand" "")]
1101 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1105 (define_expand "reduc_splus_v8sf"
1106 [(match_operand:V8SF 0 "register_operand" "")
1107 (match_operand:V8SF 1 "register_operand" "")]
1110 rtx tmp = gen_reg_rtx (V8SFmode);
1111 rtx tmp2 = gen_reg_rtx (V8SFmode);
1112 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1113 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1114 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1115 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1119 (define_expand "reduc_splus_v4sf"
1120 [(match_operand:V4SF 0 "register_operand" "")
1121 (match_operand:V4SF 1 "register_operand" "")]
1126 rtx tmp = gen_reg_rtx (V4SFmode);
1127 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1128 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1131 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1136 (define_expand "reduc_smax_v4sf"
1137 [(match_operand:V4SF 0 "register_operand" "")
1138 (match_operand:V4SF 1 "register_operand" "")]
1141 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1145 (define_expand "reduc_smin_v4sf"
1146 [(match_operand:V4SF 0 "register_operand" "")
1147 (match_operand:V4SF 1 "register_operand" "")]
1150 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1154 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1156 ;; Parallel floating point comparisons
1158 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1160 (define_insn "avx_cmp<mode>3"
1161 [(set (match_operand:VF 0 "register_operand" "=x")
1163 [(match_operand:VF 1 "register_operand" "x")
1164 (match_operand:VF 2 "nonimmediate_operand" "xm")
1165 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1168 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1169 [(set_attr "type" "ssecmp")
1170 (set_attr "length_immediate" "1")
1171 (set_attr "prefix" "vex")
1172 (set_attr "mode" "<MODE>")])
1174 (define_insn "avx_vmcmp<mode>3"
1175 [(set (match_operand:VF_128 0 "register_operand" "=x")
1178 [(match_operand:VF_128 1 "register_operand" "x")
1179 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1180 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1185 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1186 [(set_attr "type" "ssecmp")
1187 (set_attr "length_immediate" "1")
1188 (set_attr "prefix" "vex")
1189 (set_attr "mode" "<ssescalarmode>")])
1191 (define_insn "<sse>_maskcmp<mode>3"
1192 [(set (match_operand:VF 0 "register_operand" "=x,x")
1193 (match_operator:VF 3 "sse_comparison_operator"
1194 [(match_operand:VF 1 "register_operand" "0,x")
1195 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1198 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1199 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1200 [(set_attr "isa" "noavx,avx")
1201 (set_attr "type" "ssecmp")
1202 (set_attr "length_immediate" "1")
1203 (set_attr "prefix" "orig,vex")
1204 (set_attr "mode" "<MODE>")])
1206 (define_insn "<sse>_vmmaskcmp<mode>3"
1207 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1209 (match_operator:VF_128 3 "sse_comparison_operator"
1210 [(match_operand:VF_128 1 "register_operand" "0,x")
1211 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1216 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1217 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1218 [(set_attr "isa" "noavx,avx")
1219 (set_attr "type" "ssecmp")
1220 (set_attr "length_immediate" "1,*")
1221 (set_attr "prefix" "orig,vex")
1222 (set_attr "mode" "<ssescalarmode>")])
1224 (define_insn "<sse>_comi"
1225 [(set (reg:CCFP FLAGS_REG)
1228 (match_operand:<ssevecmode> 0 "register_operand" "x")
1229 (parallel [(const_int 0)]))
1231 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1232 (parallel [(const_int 0)]))))]
1233 "SSE_FLOAT_MODE_P (<MODE>mode)"
1234 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1235 [(set_attr "type" "ssecomi")
1236 (set_attr "prefix" "maybe_vex")
1237 (set_attr "prefix_rep" "0")
1238 (set (attr "prefix_data16")
1239 (if_then_else (eq_attr "mode" "DF")
1241 (const_string "0")))
1242 (set_attr "mode" "<MODE>")])
1244 (define_insn "<sse>_ucomi"
1245 [(set (reg:CCFPU FLAGS_REG)
1248 (match_operand:<ssevecmode> 0 "register_operand" "x")
1249 (parallel [(const_int 0)]))
1251 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1252 (parallel [(const_int 0)]))))]
1253 "SSE_FLOAT_MODE_P (<MODE>mode)"
1254 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1255 [(set_attr "type" "ssecomi")
1256 (set_attr "prefix" "maybe_vex")
1257 (set_attr "prefix_rep" "0")
1258 (set (attr "prefix_data16")
1259 (if_then_else (eq_attr "mode" "DF")
1261 (const_string "0")))
1262 (set_attr "mode" "<MODE>")])
1264 (define_expand "vcond<mode>"
1265 [(set (match_operand:VF 0 "register_operand" "")
1267 (match_operator 3 ""
1268 [(match_operand:VF 4 "nonimmediate_operand" "")
1269 (match_operand:VF 5 "nonimmediate_operand" "")])
1270 (match_operand:VF 1 "general_operand" "")
1271 (match_operand:VF 2 "general_operand" "")))]
1274 bool ok = ix86_expand_fp_vcond (operands);
1279 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1281 ;; Parallel floating point logical operations
1283 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1285 (define_insn "<sse>_andnot<mode>3"
1286 [(set (match_operand:VF 0 "register_operand" "=x,x")
1289 (match_operand:VF 1 "register_operand" "0,x"))
1290 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1293 static char buf[32];
1296 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1298 switch (which_alternative)
1301 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1304 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1310 snprintf (buf, sizeof (buf), insn, suffix);
1313 [(set_attr "isa" "noavx,avx")
1314 (set_attr "type" "sselog")
1315 (set_attr "prefix" "orig,vex")
1316 (set_attr "mode" "<MODE>")])
1318 (define_expand "<code><mode>3"
1319 [(set (match_operand:VF 0 "register_operand" "")
1321 (match_operand:VF 1 "nonimmediate_operand" "")
1322 (match_operand:VF 2 "nonimmediate_operand" "")))]
1324 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1326 (define_insn "*<code><mode>3"
1327 [(set (match_operand:VF 0 "register_operand" "=x,x")
1329 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1330 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1331 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1333 static char buf[32];
1336 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1338 switch (which_alternative)
1341 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1344 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1350 snprintf (buf, sizeof (buf), insn, suffix);
1353 [(set_attr "isa" "noavx,avx")
1354 (set_attr "type" "sselog")
1355 (set_attr "prefix" "orig,vex")
1356 (set_attr "mode" "<MODE>")])
1358 (define_expand "copysign<mode>3"
1361 (not:VF (match_dup 3))
1362 (match_operand:VF 1 "nonimmediate_operand" "")))
1364 (and:VF (match_dup 3)
1365 (match_operand:VF 2 "nonimmediate_operand" "")))
1366 (set (match_operand:VF 0 "register_operand" "")
1367 (ior:VF (match_dup 4) (match_dup 5)))]
1370 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1372 operands[4] = gen_reg_rtx (<MODE>mode);
1373 operands[5] = gen_reg_rtx (<MODE>mode);
1376 ;; Also define scalar versions. These are used for abs, neg, and
1377 ;; conditional move. Using subregs into vector modes causes register
1378 ;; allocation lossage. These patterns do not allow memory operands
1379 ;; because the native instructions read the full 128-bits.
1381 (define_insn "*andnot<mode>3"
1382 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1385 (match_operand:MODEF 1 "register_operand" "0,x"))
1386 (match_operand:MODEF 2 "register_operand" "x,x")))]
1387 "SSE_FLOAT_MODE_P (<MODE>mode)"
1389 static char buf[32];
1392 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1394 switch (which_alternative)
1397 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1400 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1406 snprintf (buf, sizeof (buf), insn, suffix);
1409 [(set_attr "isa" "noavx,avx")
1410 (set_attr "type" "sselog")
1411 (set_attr "prefix" "orig,vex")
1412 (set_attr "mode" "<ssevecmode>")])
1414 (define_insn "*<code><mode>3"
1415 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1417 (match_operand:MODEF 1 "register_operand" "%0,x")
1418 (match_operand:MODEF 2 "register_operand" "x,x")))]
1419 "SSE_FLOAT_MODE_P (<MODE>mode)"
1421 static char buf[32];
1424 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1426 switch (which_alternative)
1429 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1432 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1438 snprintf (buf, sizeof (buf), insn, suffix);
1441 [(set_attr "isa" "noavx,avx")
1442 (set_attr "type" "sselog")
1443 (set_attr "prefix" "orig,vex")
1444 (set_attr "mode" "<ssevecmode>")])
1446 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1448 ;; FMA4 floating point multiply/accumulate instructions. This
1449 ;; includes the scalar version of the instructions as well as the
1452 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1454 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1455 ;; combine to generate a multiply/add with two memory references. We then
1456 ;; split this insn, into loading up the destination register with one of the
1457 ;; memory operations. If we don't manage to split the insn, reload will
1458 ;; generate the appropriate moves. The reason this is needed, is that combine
1459 ;; has already folded one of the memory references into both the multiply and
1460 ;; add insns, and it can't generate a new pseudo. I.e.:
1461 ;; (set (reg1) (mem (addr1)))
1462 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1463 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1465 ;; ??? This is historic, pre-dating the gimple fma transformation.
1466 ;; We could now properly represent that only one memory operand is
1467 ;; allowed and not be penalized during optimization.
1469 ;; Intrinsic FMA operations.
1471 ;; The standard names for fma is only available with SSE math enabled.
1472 (define_expand "fma<mode>4"
1473 [(set (match_operand:FMAMODE 0 "register_operand")
1475 (match_operand:FMAMODE 1 "nonimmediate_operand")
1476 (match_operand:FMAMODE 2 "nonimmediate_operand")
1477 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1478 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1480 (define_expand "fms<mode>4"
1481 [(set (match_operand:FMAMODE 0 "register_operand")
1483 (match_operand:FMAMODE 1 "nonimmediate_operand")
1484 (match_operand:FMAMODE 2 "nonimmediate_operand")
1485 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1486 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1488 (define_expand "fnma<mode>4"
1489 [(set (match_operand:FMAMODE 0 "register_operand")
1491 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1492 (match_operand:FMAMODE 2 "nonimmediate_operand")
1493 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1494 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1496 (define_expand "fnms<mode>4"
1497 [(set (match_operand:FMAMODE 0 "register_operand")
1499 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1500 (match_operand:FMAMODE 2 "nonimmediate_operand")
1501 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1502 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1504 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1505 (define_expand "fma4i_fmadd_<mode>"
1506 [(set (match_operand:FMAMODE 0 "register_operand")
1508 (match_operand:FMAMODE 1 "nonimmediate_operand")
1509 (match_operand:FMAMODE 2 "nonimmediate_operand")
1510 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1511 "TARGET_FMA || TARGET_FMA4")
1513 (define_insn "*fma4i_fmadd_<mode>"
1514 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1516 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1517 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1518 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1520 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1521 [(set_attr "type" "ssemuladd")
1522 (set_attr "mode" "<MODE>")])
1524 (define_insn "*fma4i_fmsub_<mode>"
1525 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1527 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1528 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1530 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1532 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1533 [(set_attr "type" "ssemuladd")
1534 (set_attr "mode" "<MODE>")])
1536 (define_insn "*fma4i_fnmadd_<mode>"
1537 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1540 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1541 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1542 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1544 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1545 [(set_attr "type" "ssemuladd")
1546 (set_attr "mode" "<MODE>")])
1548 (define_insn "*fma4i_fnmsub_<mode>"
1549 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1552 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1553 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1555 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1557 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1558 [(set_attr "type" "ssemuladd")
1559 (set_attr "mode" "<MODE>")])
1561 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1562 ;; entire destination register, with the high-order elements zeroed.
1564 (define_expand "fma4i_vmfmadd_<mode>"
1565 [(set (match_operand:VF_128 0 "register_operand")
1568 (match_operand:VF_128 1 "nonimmediate_operand")
1569 (match_operand:VF_128 2 "nonimmediate_operand")
1570 (match_operand:VF_128 3 "nonimmediate_operand"))
1575 operands[4] = CONST0_RTX (<MODE>mode);
1578 (define_insn "*fma4i_vmfmadd_<mode>"
1579 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1582 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1583 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1584 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1585 (match_operand:VF_128 4 "const0_operand" "")
1588 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1589 [(set_attr "type" "ssemuladd")
1590 (set_attr "mode" "<MODE>")])
1592 (define_insn "*fma4i_vmfmsub_<mode>"
1593 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1596 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1597 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1599 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1600 (match_operand:VF_128 4 "const0_operand" "")
1603 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1604 [(set_attr "type" "ssemuladd")
1605 (set_attr "mode" "<MODE>")])
1607 (define_insn "*fma4i_vmfnmadd_<mode>"
1608 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1612 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1613 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1614 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1615 (match_operand:VF_128 4 "const0_operand" "")
1618 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1619 [(set_attr "type" "ssemuladd")
1620 (set_attr "mode" "<MODE>")])
1622 (define_insn "*fma4i_vmfnmsub_<mode>"
1623 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1627 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1628 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1630 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1631 (match_operand:VF_128 4 "const0_operand" "")
1634 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1635 [(set_attr "type" "ssemuladd")
1636 (set_attr "mode" "<MODE>")])
1638 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1640 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1642 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1644 ;; It would be possible to represent these without the UNSPEC as
1647 ;; (fma op1 op2 op3)
1648 ;; (fma op1 op2 (neg op3))
1651 ;; But this doesn't seem useful in practice.
1653 (define_expand "fmaddsub_<mode>"
1654 [(set (match_operand:VF 0 "register_operand")
1656 [(match_operand:VF 1 "nonimmediate_operand")
1657 (match_operand:VF 2 "nonimmediate_operand")
1658 (match_operand:VF 3 "nonimmediate_operand")]
1660 "TARGET_FMA || TARGET_FMA4")
1662 (define_insn "*fma4_fmaddsub_<mode>"
1663 [(set (match_operand:VF 0 "register_operand" "=x,x")
1665 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1666 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1667 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1670 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1671 [(set_attr "type" "ssemuladd")
1672 (set_attr "mode" "<MODE>")])
1674 (define_insn "*fma4_fmsubadd_<mode>"
1675 [(set (match_operand:VF 0 "register_operand" "=x,x")
1677 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1678 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1680 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1683 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1684 [(set_attr "type" "ssemuladd")
1685 (set_attr "mode" "<MODE>")])
1687 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1689 ;; FMA3 floating point multiply/accumulate instructions.
1691 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1693 (define_insn "*fma_fmadd_<mode>"
1694 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1696 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1697 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1698 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1701 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1702 vfmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1703 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1704 [(set_attr "type" "ssemuladd")
1705 (set_attr "mode" "<MODE>")])
1707 (define_insn "*fma_fmsub_<mode>"
1708 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1710 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1711 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1713 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1716 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1717 vfmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1718 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1719 [(set_attr "type" "ssemuladd")
1720 (set_attr "mode" "<MODE>")])
1722 (define_insn "*fma_fmadd_<mode>"
1723 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1726 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1727 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1728 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1731 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1732 vfnmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1733 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1734 [(set_attr "type" "ssemuladd")
1735 (set_attr "mode" "<MODE>")])
1737 (define_insn "*fma_fmsub_<mode>"
1738 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1741 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1742 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1744 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1747 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1748 vfnmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1749 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1750 [(set_attr "type" "ssemuladd")
1751 (set_attr "mode" "<MODE>")])
1753 (define_insn "*fma_fmaddsub_<mode>"
1754 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
1756 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
1757 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
1758 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
1762 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1763 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1764 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1765 [(set_attr "type" "ssemuladd")
1766 (set_attr "mode" "<MODE>")])
1768 (define_insn "*fma_fmsubadd_<mode>"
1769 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
1771 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
1772 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
1774 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
1778 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1779 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1780 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1781 [(set_attr "type" "ssemuladd")
1782 (set_attr "mode" "<MODE>")])
1784 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1786 ;; Parallel single-precision floating point conversion operations
1788 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1790 (define_insn "sse_cvtpi2ps"
1791 [(set (match_operand:V4SF 0 "register_operand" "=x")
1794 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1795 (match_operand:V4SF 1 "register_operand" "0")
1798 "cvtpi2ps\t{%2, %0|%0, %2}"
1799 [(set_attr "type" "ssecvt")
1800 (set_attr "mode" "V4SF")])
1802 (define_insn "sse_cvtps2pi"
1803 [(set (match_operand:V2SI 0 "register_operand" "=y")
1805 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1807 (parallel [(const_int 0) (const_int 1)])))]
1809 "cvtps2pi\t{%1, %0|%0, %1}"
1810 [(set_attr "type" "ssecvt")
1811 (set_attr "unit" "mmx")
1812 (set_attr "mode" "DI")])
1814 (define_insn "sse_cvttps2pi"
1815 [(set (match_operand:V2SI 0 "register_operand" "=y")
1817 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1818 (parallel [(const_int 0) (const_int 1)])))]
1820 "cvttps2pi\t{%1, %0|%0, %1}"
1821 [(set_attr "type" "ssecvt")
1822 (set_attr "unit" "mmx")
1823 (set_attr "prefix_rep" "0")
1824 (set_attr "mode" "SF")])
1826 (define_insn "sse_cvtsi2ss"
1827 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1830 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
1831 (match_operand:V4SF 1 "register_operand" "0,0,x")
1835 cvtsi2ss\t{%2, %0|%0, %2}
1836 cvtsi2ss\t{%2, %0|%0, %2}
1837 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
1838 [(set_attr "isa" "noavx,noavx,avx")
1839 (set_attr "type" "sseicvt")
1840 (set_attr "athlon_decode" "vector,double,*")
1841 (set_attr "amdfam10_decode" "vector,double,*")
1842 (set_attr "bdver1_decode" "double,direct,*")
1843 (set_attr "prefix" "orig,orig,vex")
1844 (set_attr "mode" "SF")])
1846 (define_insn "sse_cvtsi2ssq"
1847 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1850 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
1851 (match_operand:V4SF 1 "register_operand" "0,0,x")
1853 "TARGET_SSE && TARGET_64BIT"
1855 cvtsi2ssq\t{%2, %0|%0, %2}
1856 cvtsi2ssq\t{%2, %0|%0, %2}
1857 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
1858 [(set_attr "isa" "noavx,noavx,avx")
1859 (set_attr "type" "sseicvt")
1860 (set_attr "athlon_decode" "vector,double,*")
1861 (set_attr "amdfam10_decode" "vector,double,*")
1862 (set_attr "bdver1_decode" "double,direct,*")
1863 (set_attr "length_vex" "*,*,4")
1864 (set_attr "prefix_rex" "1,1,*")
1865 (set_attr "prefix" "orig,orig,vex")
1866 (set_attr "mode" "SF")])
1868 (define_insn "sse_cvtss2si"
1869 [(set (match_operand:SI 0 "register_operand" "=r,r")
1872 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1873 (parallel [(const_int 0)]))]
1874 UNSPEC_FIX_NOTRUNC))]
1876 "%vcvtss2si\t{%1, %0|%0, %1}"
1877 [(set_attr "type" "sseicvt")
1878 (set_attr "athlon_decode" "double,vector")
1879 (set_attr "bdver1_decode" "double,double")
1880 (set_attr "prefix_rep" "1")
1881 (set_attr "prefix" "maybe_vex")
1882 (set_attr "mode" "SI")])
1884 (define_insn "sse_cvtss2si_2"
1885 [(set (match_operand:SI 0 "register_operand" "=r,r")
1886 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1887 UNSPEC_FIX_NOTRUNC))]
1889 "%vcvtss2si\t{%1, %0|%0, %1}"
1890 [(set_attr "type" "sseicvt")
1891 (set_attr "athlon_decode" "double,vector")
1892 (set_attr "amdfam10_decode" "double,double")
1893 (set_attr "bdver1_decode" "double,double")
1894 (set_attr "prefix_rep" "1")
1895 (set_attr "prefix" "maybe_vex")
1896 (set_attr "mode" "SI")])
1898 (define_insn "sse_cvtss2siq"
1899 [(set (match_operand:DI 0 "register_operand" "=r,r")
1902 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1903 (parallel [(const_int 0)]))]
1904 UNSPEC_FIX_NOTRUNC))]
1905 "TARGET_SSE && TARGET_64BIT"
1906 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
1907 [(set_attr "type" "sseicvt")
1908 (set_attr "athlon_decode" "double,vector")
1909 (set_attr "bdver1_decode" "double,double")
1910 (set_attr "prefix_rep" "1")
1911 (set_attr "prefix" "maybe_vex")
1912 (set_attr "mode" "DI")])
1914 (define_insn "sse_cvtss2siq_2"
1915 [(set (match_operand:DI 0 "register_operand" "=r,r")
1916 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1917 UNSPEC_FIX_NOTRUNC))]
1918 "TARGET_SSE && TARGET_64BIT"
1919 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
1920 [(set_attr "type" "sseicvt")
1921 (set_attr "athlon_decode" "double,vector")
1922 (set_attr "amdfam10_decode" "double,double")
1923 (set_attr "bdver1_decode" "double,double")
1924 (set_attr "prefix_rep" "1")
1925 (set_attr "prefix" "maybe_vex")
1926 (set_attr "mode" "DI")])
1928 (define_insn "sse_cvttss2si"
1929 [(set (match_operand:SI 0 "register_operand" "=r,r")
1932 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1933 (parallel [(const_int 0)]))))]
1935 "%vcvttss2si\t{%1, %0|%0, %1}"
1936 [(set_attr "type" "sseicvt")
1937 (set_attr "athlon_decode" "double,vector")
1938 (set_attr "amdfam10_decode" "double,double")
1939 (set_attr "bdver1_decode" "double,double")
1940 (set_attr "prefix_rep" "1")
1941 (set_attr "prefix" "maybe_vex")
1942 (set_attr "mode" "SI")])
1944 (define_insn "sse_cvttss2siq"
1945 [(set (match_operand:DI 0 "register_operand" "=r,r")
1948 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1949 (parallel [(const_int 0)]))))]
1950 "TARGET_SSE && TARGET_64BIT"
1951 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
1952 [(set_attr "type" "sseicvt")
1953 (set_attr "athlon_decode" "double,vector")
1954 (set_attr "amdfam10_decode" "double,double")
1955 (set_attr "bdver1_decode" "double,double")
1956 (set_attr "prefix_rep" "1")
1957 (set_attr "prefix" "maybe_vex")
1958 (set_attr "mode" "DI")])
1960 (define_insn "avx_cvtdq2ps256"
1961 [(set (match_operand:V8SF 0 "register_operand" "=x")
1962 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
1964 "vcvtdq2ps\t{%1, %0|%0, %1}"
1965 [(set_attr "type" "ssecvt")
1966 (set_attr "prefix" "vex")
1967 (set_attr "mode" "V8SF")])
1969 (define_insn "sse2_cvtdq2ps"
1970 [(set (match_operand:V4SF 0 "register_operand" "=x")
1971 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1973 "%vcvtdq2ps\t{%1, %0|%0, %1}"
1974 [(set_attr "type" "ssecvt")
1975 (set_attr "prefix" "maybe_vex")
1976 (set_attr "mode" "V4SF")])
1978 (define_expand "sse2_cvtudq2ps"
1980 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
1982 (lt:V4SF (match_dup 5) (match_dup 3)))
1984 (and:V4SF (match_dup 6) (match_dup 4)))
1985 (set (match_operand:V4SF 0 "register_operand" "")
1986 (plus:V4SF (match_dup 5) (match_dup 7)))]
1989 REAL_VALUE_TYPE TWO32r;
1993 real_ldexp (&TWO32r, &dconst1, 32);
1994 x = const_double_from_real_value (TWO32r, SFmode);
1996 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
1997 operands[4] = force_reg (V4SFmode,
1998 ix86_build_const_vector (V4SFmode, 1, x));
2000 for (i = 5; i < 8; i++)
2001 operands[i] = gen_reg_rtx (V4SFmode);
2004 (define_insn "avx_cvtps2dq256"
2005 [(set (match_operand:V8SI 0 "register_operand" "=x")
2006 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2007 UNSPEC_FIX_NOTRUNC))]
2009 "vcvtps2dq\t{%1, %0|%0, %1}"
2010 [(set_attr "type" "ssecvt")
2011 (set_attr "prefix" "vex")
2012 (set_attr "mode" "OI")])
2014 (define_insn "sse2_cvtps2dq"
2015 [(set (match_operand:V4SI 0 "register_operand" "=x")
2016 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2017 UNSPEC_FIX_NOTRUNC))]
2019 "%vcvtps2dq\t{%1, %0|%0, %1}"
2020 [(set_attr "type" "ssecvt")
2021 (set (attr "prefix_data16")
2023 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2025 (const_string "1")))
2026 (set_attr "prefix" "maybe_vex")
2027 (set_attr "mode" "TI")])
2029 (define_insn "avx_cvttps2dq256"
2030 [(set (match_operand:V8SI 0 "register_operand" "=x")
2031 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2033 "vcvttps2dq\t{%1, %0|%0, %1}"
2034 [(set_attr "type" "ssecvt")
2035 (set_attr "prefix" "vex")
2036 (set_attr "mode" "OI")])
2038 (define_insn "sse2_cvttps2dq"
2039 [(set (match_operand:V4SI 0 "register_operand" "=x")
2040 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2042 "%vcvttps2dq\t{%1, %0|%0, %1}"
2043 [(set_attr "type" "ssecvt")
2044 (set (attr "prefix_rep")
2046 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2048 (const_string "1")))
2049 (set (attr "prefix_data16")
2051 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2053 (const_string "0")))
2054 (set_attr "prefix_data16" "0")
2055 (set_attr "prefix" "maybe_vex")
2056 (set_attr "mode" "TI")])
2058 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2060 ;; Parallel double-precision floating point conversion operations
2062 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2064 (define_insn "sse2_cvtpi2pd"
2065 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2066 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2068 "cvtpi2pd\t{%1, %0|%0, %1}"
2069 [(set_attr "type" "ssecvt")
2070 (set_attr "unit" "mmx,*")
2071 (set_attr "prefix_data16" "1,*")
2072 (set_attr "mode" "V2DF")])
2074 (define_insn "sse2_cvtpd2pi"
2075 [(set (match_operand:V2SI 0 "register_operand" "=y")
2076 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2077 UNSPEC_FIX_NOTRUNC))]
2079 "cvtpd2pi\t{%1, %0|%0, %1}"
2080 [(set_attr "type" "ssecvt")
2081 (set_attr "unit" "mmx")
2082 (set_attr "bdver1_decode" "double")
2083 (set_attr "prefix_data16" "1")
2084 (set_attr "mode" "DI")])
2086 (define_insn "sse2_cvttpd2pi"
2087 [(set (match_operand:V2SI 0 "register_operand" "=y")
2088 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2090 "cvttpd2pi\t{%1, %0|%0, %1}"
2091 [(set_attr "type" "ssecvt")
2092 (set_attr "unit" "mmx")
2093 (set_attr "bdver1_decode" "double")
2094 (set_attr "prefix_data16" "1")
2095 (set_attr "mode" "TI")])
2097 (define_insn "sse2_cvtsi2sd"
2098 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2101 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2102 (match_operand:V2DF 1 "register_operand" "0,0,x")
2106 cvtsi2sd\t{%2, %0|%0, %2}
2107 cvtsi2sd\t{%2, %0|%0, %2}
2108 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2109 [(set_attr "isa" "noavx,noavx,avx")
2110 (set_attr "type" "sseicvt")
2111 (set_attr "athlon_decode" "double,direct,*")
2112 (set_attr "amdfam10_decode" "vector,double,*")
2113 (set_attr "bdver1_decode" "double,direct,*")
2114 (set_attr "prefix" "orig,orig,vex")
2115 (set_attr "mode" "DF")])
2117 (define_insn "sse2_cvtsi2sdq"
2118 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2121 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2122 (match_operand:V2DF 1 "register_operand" "0,0,x")
2124 "TARGET_SSE2 && TARGET_64BIT"
2126 cvtsi2sdq\t{%2, %0|%0, %2}
2127 cvtsi2sdq\t{%2, %0|%0, %2}
2128 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2129 [(set_attr "isa" "noavx,noavx,avx")
2130 (set_attr "type" "sseicvt")
2131 (set_attr "athlon_decode" "double,direct,*")
2132 (set_attr "amdfam10_decode" "vector,double,*")
2133 (set_attr "bdver1_decode" "double,direct,*")
2134 (set_attr "length_vex" "*,*,4")
2135 (set_attr "prefix_rex" "1,1,*")
2136 (set_attr "prefix" "orig,orig,vex")
2137 (set_attr "mode" "DF")])
2139 (define_insn "sse2_cvtsd2si"
2140 [(set (match_operand:SI 0 "register_operand" "=r,r")
2143 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2144 (parallel [(const_int 0)]))]
2145 UNSPEC_FIX_NOTRUNC))]
2147 "%vcvtsd2si\t{%1, %0|%0, %1}"
2148 [(set_attr "type" "sseicvt")
2149 (set_attr "athlon_decode" "double,vector")
2150 (set_attr "bdver1_decode" "double,double")
2151 (set_attr "prefix_rep" "1")
2152 (set_attr "prefix" "maybe_vex")
2153 (set_attr "mode" "SI")])
2155 (define_insn "sse2_cvtsd2si_2"
2156 [(set (match_operand:SI 0 "register_operand" "=r,r")
2157 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2158 UNSPEC_FIX_NOTRUNC))]
2160 "%vcvtsd2si\t{%1, %0|%0, %1}"
2161 [(set_attr "type" "sseicvt")
2162 (set_attr "athlon_decode" "double,vector")
2163 (set_attr "amdfam10_decode" "double,double")
2164 (set_attr "bdver1_decode" "double,double")
2165 (set_attr "prefix_rep" "1")
2166 (set_attr "prefix" "maybe_vex")
2167 (set_attr "mode" "SI")])
2169 (define_insn "sse2_cvtsd2siq"
2170 [(set (match_operand:DI 0 "register_operand" "=r,r")
2173 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2174 (parallel [(const_int 0)]))]
2175 UNSPEC_FIX_NOTRUNC))]
2176 "TARGET_SSE2 && TARGET_64BIT"
2177 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2178 [(set_attr "type" "sseicvt")
2179 (set_attr "athlon_decode" "double,vector")
2180 (set_attr "bdver1_decode" "double,double")
2181 (set_attr "prefix_rep" "1")
2182 (set_attr "prefix" "maybe_vex")
2183 (set_attr "mode" "DI")])
2185 (define_insn "sse2_cvtsd2siq_2"
2186 [(set (match_operand:DI 0 "register_operand" "=r,r")
2187 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2188 UNSPEC_FIX_NOTRUNC))]
2189 "TARGET_SSE2 && TARGET_64BIT"
2190 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2191 [(set_attr "type" "sseicvt")
2192 (set_attr "athlon_decode" "double,vector")
2193 (set_attr "amdfam10_decode" "double,double")
2194 (set_attr "bdver1_decode" "double,double")
2195 (set_attr "prefix_rep" "1")
2196 (set_attr "prefix" "maybe_vex")
2197 (set_attr "mode" "DI")])
2199 (define_insn "sse2_cvttsd2si"
2200 [(set (match_operand:SI 0 "register_operand" "=r,r")
2203 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2204 (parallel [(const_int 0)]))))]
2206 "%vcvttsd2si\t{%1, %0|%0, %1}"
2207 [(set_attr "type" "sseicvt")
2208 (set_attr "athlon_decode" "double,vector")
2209 (set_attr "amdfam10_decode" "double,double")
2210 (set_attr "bdver1_decode" "double,double")
2211 (set_attr "prefix_rep" "1")
2212 (set_attr "prefix" "maybe_vex")
2213 (set_attr "mode" "SI")])
2215 (define_insn "sse2_cvttsd2siq"
2216 [(set (match_operand:DI 0 "register_operand" "=r,r")
2219 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2220 (parallel [(const_int 0)]))))]
2221 "TARGET_SSE2 && TARGET_64BIT"
2222 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2223 [(set_attr "type" "sseicvt")
2224 (set_attr "athlon_decode" "double,vector")
2225 (set_attr "amdfam10_decode" "double,double")
2226 (set_attr "bdver1_decode" "double,double")
2227 (set_attr "prefix_rep" "1")
2228 (set_attr "prefix" "maybe_vex")
2229 (set_attr "mode" "DI")])
2231 (define_insn "avx_cvtdq2pd256"
2232 [(set (match_operand:V4DF 0 "register_operand" "=x")
2233 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2235 "vcvtdq2pd\t{%1, %0|%0, %1}"
2236 [(set_attr "type" "ssecvt")
2237 (set_attr "prefix" "vex")
2238 (set_attr "mode" "V4DF")])
2240 (define_insn "*avx_cvtdq2pd256_2"
2241 [(set (match_operand:V4DF 0 "register_operand" "=x")
2244 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2245 (parallel [(const_int 0) (const_int 1)
2246 (const_int 2) (const_int 3)]))))]
2248 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2249 [(set_attr "type" "ssecvt")
2250 (set_attr "prefix" "vex")
2251 (set_attr "mode" "V4DF")])
2253 (define_insn "sse2_cvtdq2pd"
2254 [(set (match_operand:V2DF 0 "register_operand" "=x")
2257 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2258 (parallel [(const_int 0) (const_int 1)]))))]
2260 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2261 [(set_attr "type" "ssecvt")
2262 (set_attr "prefix" "maybe_vex")
2263 (set_attr "mode" "V2DF")])
2265 (define_insn "avx_cvtpd2dq256"
2266 [(set (match_operand:V4SI 0 "register_operand" "=x")
2267 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2268 UNSPEC_FIX_NOTRUNC))]
2270 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2271 [(set_attr "type" "ssecvt")
2272 (set_attr "prefix" "vex")
2273 (set_attr "mode" "OI")])
2275 (define_expand "sse2_cvtpd2dq"
2276 [(set (match_operand:V4SI 0 "register_operand" "")
2278 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2282 "operands[2] = CONST0_RTX (V2SImode);")
2284 (define_insn "*sse2_cvtpd2dq"
2285 [(set (match_operand:V4SI 0 "register_operand" "=x")
2287 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2289 (match_operand:V2SI 2 "const0_operand" "")))]
2293 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2295 return "cvtpd2dq\t{%1, %0|%0, %1}";
2297 [(set_attr "type" "ssecvt")
2298 (set_attr "prefix_rep" "1")
2299 (set_attr "prefix_data16" "0")
2300 (set_attr "prefix" "maybe_vex")
2301 (set_attr "mode" "TI")
2302 (set_attr "amdfam10_decode" "double")
2303 (set_attr "athlon_decode" "vector")
2304 (set_attr "bdver1_decode" "double")])
2306 (define_insn "avx_cvttpd2dq256"
2307 [(set (match_operand:V4SI 0 "register_operand" "=x")
2308 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2310 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2311 [(set_attr "type" "ssecvt")
2312 (set_attr "prefix" "vex")
2313 (set_attr "mode" "OI")])
2315 (define_expand "sse2_cvttpd2dq"
2316 [(set (match_operand:V4SI 0 "register_operand" "")
2318 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2321 "operands[2] = CONST0_RTX (V2SImode);")
2323 (define_insn "*sse2_cvttpd2dq"
2324 [(set (match_operand:V4SI 0 "register_operand" "=x")
2326 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2327 (match_operand:V2SI 2 "const0_operand" "")))]
2331 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2333 return "cvttpd2dq\t{%1, %0|%0, %1}";
2335 [(set_attr "type" "ssecvt")
2336 (set_attr "amdfam10_decode" "double")
2337 (set_attr "athlon_decode" "vector")
2338 (set_attr "bdver1_decode" "double")
2339 (set_attr "prefix" "maybe_vex")
2340 (set_attr "mode" "TI")])
2342 (define_insn "sse2_cvtsd2ss"
2343 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2346 (float_truncate:V2SF
2347 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2348 (match_operand:V4SF 1 "register_operand" "0,0,x")
2352 cvtsd2ss\t{%2, %0|%0, %2}
2353 cvtsd2ss\t{%2, %0|%0, %2}
2354 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2355 [(set_attr "isa" "noavx,noavx,avx")
2356 (set_attr "type" "ssecvt")
2357 (set_attr "athlon_decode" "vector,double,*")
2358 (set_attr "amdfam10_decode" "vector,double,*")
2359 (set_attr "bdver1_decode" "direct,direct,*")
2360 (set_attr "prefix" "orig,orig,vex")
2361 (set_attr "mode" "SF")])
2363 (define_insn "sse2_cvtss2sd"
2364 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2368 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2369 (parallel [(const_int 0) (const_int 1)])))
2370 (match_operand:V2DF 1 "register_operand" "0,0,x")
2374 cvtss2sd\t{%2, %0|%0, %2}
2375 cvtss2sd\t{%2, %0|%0, %2}
2376 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2377 [(set_attr "isa" "noavx,noavx,avx")
2378 (set_attr "type" "ssecvt")
2379 (set_attr "amdfam10_decode" "vector,double,*")
2380 (set_attr "athlon_decode" "direct,direct,*")
2381 (set_attr "bdver1_decode" "direct,direct,*")
2382 (set_attr "prefix" "orig,orig,vex")
2383 (set_attr "mode" "DF")])
2385 (define_insn "avx_cvtpd2ps256"
2386 [(set (match_operand:V4SF 0 "register_operand" "=x")
2387 (float_truncate:V4SF
2388 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2390 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2391 [(set_attr "type" "ssecvt")
2392 (set_attr "prefix" "vex")
2393 (set_attr "mode" "V4SF")])
2395 (define_expand "sse2_cvtpd2ps"
2396 [(set (match_operand:V4SF 0 "register_operand" "")
2398 (float_truncate:V2SF
2399 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2402 "operands[2] = CONST0_RTX (V2SFmode);")
2404 (define_insn "*sse2_cvtpd2ps"
2405 [(set (match_operand:V4SF 0 "register_operand" "=x")
2407 (float_truncate:V2SF
2408 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2409 (match_operand:V2SF 2 "const0_operand" "")))]
2413 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2415 return "cvtpd2ps\t{%1, %0|%0, %1}";
2417 [(set_attr "type" "ssecvt")
2418 (set_attr "amdfam10_decode" "double")
2419 (set_attr "athlon_decode" "vector")
2420 (set_attr "bdver1_decode" "double")
2421 (set_attr "prefix_data16" "1")
2422 (set_attr "prefix" "maybe_vex")
2423 (set_attr "mode" "V4SF")])
2425 (define_insn "avx_cvtps2pd256"
2426 [(set (match_operand:V4DF 0 "register_operand" "=x")
2428 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2430 "vcvtps2pd\t{%1, %0|%0, %1}"
2431 [(set_attr "type" "ssecvt")
2432 (set_attr "prefix" "vex")
2433 (set_attr "mode" "V4DF")])
2435 (define_insn "*avx_cvtps2pd256_2"
2436 [(set (match_operand:V4DF 0 "register_operand" "=x")
2439 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2440 (parallel [(const_int 0) (const_int 1)
2441 (const_int 2) (const_int 3)]))))]
2443 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2444 [(set_attr "type" "ssecvt")
2445 (set_attr "prefix" "vex")
2446 (set_attr "mode" "V4DF")])
2448 (define_insn "sse2_cvtps2pd"
2449 [(set (match_operand:V2DF 0 "register_operand" "=x")
2452 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2453 (parallel [(const_int 0) (const_int 1)]))))]
2455 "%vcvtps2pd\t{%1, %0|%0, %1}"
2456 [(set_attr "type" "ssecvt")
2457 (set_attr "amdfam10_decode" "direct")
2458 (set_attr "athlon_decode" "double")
2459 (set_attr "bdver1_decode" "double")
2460 (set_attr "prefix_data16" "0")
2461 (set_attr "prefix" "maybe_vex")
2462 (set_attr "mode" "V2DF")])
2464 (define_expand "vec_unpacks_hi_v4sf"
2469 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2470 (parallel [(const_int 6) (const_int 7)
2471 (const_int 2) (const_int 3)])))
2472 (set (match_operand:V2DF 0 "register_operand" "")
2476 (parallel [(const_int 0) (const_int 1)]))))]
2478 "operands[2] = gen_reg_rtx (V4SFmode);")
2480 (define_expand "vec_unpacks_hi_v8sf"
2483 (match_operand:V8SF 1 "nonimmediate_operand" "")
2484 (parallel [(const_int 4) (const_int 5)
2485 (const_int 6) (const_int 7)])))
2486 (set (match_operand:V4DF 0 "register_operand" "")
2490 "operands[2] = gen_reg_rtx (V4SFmode);")
2492 (define_expand "vec_unpacks_lo_v4sf"
2493 [(set (match_operand:V2DF 0 "register_operand" "")
2496 (match_operand:V4SF 1 "nonimmediate_operand" "")
2497 (parallel [(const_int 0) (const_int 1)]))))]
2500 (define_expand "vec_unpacks_lo_v8sf"
2501 [(set (match_operand:V4DF 0 "register_operand" "")
2504 (match_operand:V8SF 1 "nonimmediate_operand" "")
2505 (parallel [(const_int 0) (const_int 1)
2506 (const_int 2) (const_int 3)]))))]
2509 (define_expand "vec_unpacks_float_hi_v8hi"
2510 [(match_operand:V4SF 0 "register_operand" "")
2511 (match_operand:V8HI 1 "register_operand" "")]
2514 rtx tmp = gen_reg_rtx (V4SImode);
2516 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2517 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2521 (define_expand "vec_unpacks_float_lo_v8hi"
2522 [(match_operand:V4SF 0 "register_operand" "")
2523 (match_operand:V8HI 1 "register_operand" "")]
2526 rtx tmp = gen_reg_rtx (V4SImode);
2528 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2529 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2533 (define_expand "vec_unpacku_float_hi_v8hi"
2534 [(match_operand:V4SF 0 "register_operand" "")
2535 (match_operand:V8HI 1 "register_operand" "")]
2538 rtx tmp = gen_reg_rtx (V4SImode);
2540 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2541 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2545 (define_expand "vec_unpacku_float_lo_v8hi"
2546 [(match_operand:V4SF 0 "register_operand" "")
2547 (match_operand:V8HI 1 "register_operand" "")]
2550 rtx tmp = gen_reg_rtx (V4SImode);
2552 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2553 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2557 (define_expand "vec_unpacks_float_hi_v4si"
2560 (match_operand:V4SI 1 "nonimmediate_operand" "")
2561 (parallel [(const_int 2) (const_int 3)
2562 (const_int 2) (const_int 3)])))
2563 (set (match_operand:V2DF 0 "register_operand" "")
2567 (parallel [(const_int 0) (const_int 1)]))))]
2569 "operands[2] = gen_reg_rtx (V4SImode);")
2571 (define_expand "vec_unpacks_float_lo_v4si"
2572 [(set (match_operand:V2DF 0 "register_operand" "")
2575 (match_operand:V4SI 1 "nonimmediate_operand" "")
2576 (parallel [(const_int 0) (const_int 1)]))))]
2579 (define_expand "vec_unpacks_float_hi_v8si"
2582 (match_operand:V8SI 1 "nonimmediate_operand" "")
2583 (parallel [(const_int 4) (const_int 5)
2584 (const_int 6) (const_int 7)])))
2585 (set (match_operand:V4DF 0 "register_operand" "")
2589 "operands[2] = gen_reg_rtx (V4SImode);")
2591 (define_expand "vec_unpacks_float_lo_v8si"
2592 [(set (match_operand:V4DF 0 "register_operand" "")
2595 (match_operand:V8SI 1 "nonimmediate_operand" "")
2596 (parallel [(const_int 0) (const_int 1)
2597 (const_int 2) (const_int 3)]))))]
2600 (define_expand "vec_unpacku_float_hi_v4si"
2603 (match_operand:V4SI 1 "nonimmediate_operand" "")
2604 (parallel [(const_int 2) (const_int 3)
2605 (const_int 2) (const_int 3)])))
2610 (parallel [(const_int 0) (const_int 1)]))))
2612 (lt:V2DF (match_dup 6) (match_dup 3)))
2614 (and:V2DF (match_dup 7) (match_dup 4)))
2615 (set (match_operand:V2DF 0 "register_operand" "")
2616 (plus:V2DF (match_dup 6) (match_dup 8)))]
2619 REAL_VALUE_TYPE TWO32r;
2623 real_ldexp (&TWO32r, &dconst1, 32);
2624 x = const_double_from_real_value (TWO32r, DFmode);
2626 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2627 operands[4] = force_reg (V2DFmode,
2628 ix86_build_const_vector (V2DFmode, 1, x));
2630 operands[5] = gen_reg_rtx (V4SImode);
2632 for (i = 6; i < 9; i++)
2633 operands[i] = gen_reg_rtx (V2DFmode);
2636 (define_expand "vec_unpacku_float_lo_v4si"
2640 (match_operand:V4SI 1 "nonimmediate_operand" "")
2641 (parallel [(const_int 0) (const_int 1)]))))
2643 (lt:V2DF (match_dup 5) (match_dup 3)))
2645 (and:V2DF (match_dup 6) (match_dup 4)))
2646 (set (match_operand:V2DF 0 "register_operand" "")
2647 (plus:V2DF (match_dup 5) (match_dup 7)))]
2650 REAL_VALUE_TYPE TWO32r;
2654 real_ldexp (&TWO32r, &dconst1, 32);
2655 x = const_double_from_real_value (TWO32r, DFmode);
2657 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2658 operands[4] = force_reg (V2DFmode,
2659 ix86_build_const_vector (V2DFmode, 1, x));
2661 for (i = 5; i < 8; i++)
2662 operands[i] = gen_reg_rtx (V2DFmode);
2665 (define_expand "vec_pack_trunc_v4df"
2667 (float_truncate:V4SF
2668 (match_operand:V4DF 1 "nonimmediate_operand" "")))
2670 (float_truncate:V4SF
2671 (match_operand:V4DF 2 "nonimmediate_operand" "")))
2672 (set (match_operand:V8SF 0 "register_operand" "")
2678 operands[3] = gen_reg_rtx (V4SFmode);
2679 operands[4] = gen_reg_rtx (V4SFmode);
2682 (define_expand "vec_pack_trunc_v2df"
2683 [(match_operand:V4SF 0 "register_operand" "")
2684 (match_operand:V2DF 1 "nonimmediate_operand" "")
2685 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2690 r1 = gen_reg_rtx (V4SFmode);
2691 r2 = gen_reg_rtx (V4SFmode);
2693 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2694 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2695 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2699 (define_expand "vec_pack_sfix_trunc_v2df"
2700 [(match_operand:V4SI 0 "register_operand" "")
2701 (match_operand:V2DF 1 "nonimmediate_operand" "")
2702 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2707 r1 = gen_reg_rtx (V4SImode);
2708 r2 = gen_reg_rtx (V4SImode);
2710 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2711 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2712 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2713 gen_lowpart (V2DImode, r1),
2714 gen_lowpart (V2DImode, r2)));
2718 (define_expand "vec_pack_sfix_v2df"
2719 [(match_operand:V4SI 0 "register_operand" "")
2720 (match_operand:V2DF 1 "nonimmediate_operand" "")
2721 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2726 r1 = gen_reg_rtx (V4SImode);
2727 r2 = gen_reg_rtx (V4SImode);
2729 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2730 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2731 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2732 gen_lowpart (V2DImode, r1),
2733 gen_lowpart (V2DImode, r2)));
2737 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2739 ;; Parallel single-precision floating point element swizzling
2741 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2743 (define_expand "sse_movhlps_exp"
2744 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2747 (match_operand:V4SF 1 "nonimmediate_operand" "")
2748 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2749 (parallel [(const_int 6)
2755 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2757 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
2759 /* Fix up the destination if needed. */
2760 if (dst != operands[0])
2761 emit_move_insn (operands[0], dst);
2766 (define_insn "sse_movhlps"
2767 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
2770 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2771 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
2772 (parallel [(const_int 6)
2776 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2778 movhlps\t{%2, %0|%0, %2}
2779 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2780 movlps\t{%H2, %0|%0, %H2}
2781 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
2782 %vmovhps\t{%2, %0|%0, %2}"
2783 [(set_attr "isa" "noavx,avx,noavx,avx,base")
2784 (set_attr "type" "ssemov")
2785 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2786 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2788 (define_expand "sse_movlhps_exp"
2789 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2792 (match_operand:V4SF 1 "nonimmediate_operand" "")
2793 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2794 (parallel [(const_int 0)
2800 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2802 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
2804 /* Fix up the destination if needed. */
2805 if (dst != operands[0])
2806 emit_move_insn (operands[0], dst);
2811 (define_insn "sse_movlhps"
2812 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
2815 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2816 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
2817 (parallel [(const_int 0)
2821 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2823 movlhps\t{%2, %0|%0, %2}
2824 vmovlhps\t{%2, %1, %0|%0, %1, %2}
2825 movhps\t{%2, %0|%0, %2}
2826 vmovhps\t{%2, %1, %0|%0, %1, %2}
2827 %vmovlps\t{%2, %H0|%H0, %2}"
2828 [(set_attr "isa" "noavx,avx,noavx,avx,base")
2829 (set_attr "type" "ssemov")
2830 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2831 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2833 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
2834 (define_insn "avx_unpckhps256"
2835 [(set (match_operand:V8SF 0 "register_operand" "=x")
2838 (match_operand:V8SF 1 "register_operand" "x")
2839 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2840 (parallel [(const_int 2) (const_int 10)
2841 (const_int 3) (const_int 11)
2842 (const_int 6) (const_int 14)
2843 (const_int 7) (const_int 15)])))]
2845 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2846 [(set_attr "type" "sselog")
2847 (set_attr "prefix" "vex")
2848 (set_attr "mode" "V8SF")])
2850 (define_expand "vec_interleave_highv8sf"
2854 (match_operand:V8SF 1 "register_operand" "x")
2855 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2856 (parallel [(const_int 0) (const_int 8)
2857 (const_int 1) (const_int 9)
2858 (const_int 4) (const_int 12)
2859 (const_int 5) (const_int 13)])))
2865 (parallel [(const_int 2) (const_int 10)
2866 (const_int 3) (const_int 11)
2867 (const_int 6) (const_int 14)
2868 (const_int 7) (const_int 15)])))
2869 (set (match_operand:V8SF 0 "register_operand" "")
2874 (parallel [(const_int 4) (const_int 5)
2875 (const_int 6) (const_int 7)
2876 (const_int 12) (const_int 13)
2877 (const_int 14) (const_int 15)])))]
2880 operands[3] = gen_reg_rtx (V8SFmode);
2881 operands[4] = gen_reg_rtx (V8SFmode);
2884 (define_insn "vec_interleave_highv4sf"
2885 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2888 (match_operand:V4SF 1 "register_operand" "0,x")
2889 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2890 (parallel [(const_int 2) (const_int 6)
2891 (const_int 3) (const_int 7)])))]
2894 unpckhps\t{%2, %0|%0, %2}
2895 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2896 [(set_attr "isa" "noavx,avx")
2897 (set_attr "type" "sselog")
2898 (set_attr "prefix" "orig,vex")
2899 (set_attr "mode" "V4SF")])
2901 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
2902 (define_insn "avx_unpcklps256"
2903 [(set (match_operand:V8SF 0 "register_operand" "=x")
2906 (match_operand:V8SF 1 "register_operand" "x")
2907 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2908 (parallel [(const_int 0) (const_int 8)
2909 (const_int 1) (const_int 9)
2910 (const_int 4) (const_int 12)
2911 (const_int 5) (const_int 13)])))]
2913 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
2914 [(set_attr "type" "sselog")
2915 (set_attr "prefix" "vex")
2916 (set_attr "mode" "V8SF")])
2918 (define_expand "vec_interleave_lowv8sf"
2922 (match_operand:V8SF 1 "register_operand" "x")
2923 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2924 (parallel [(const_int 0) (const_int 8)
2925 (const_int 1) (const_int 9)
2926 (const_int 4) (const_int 12)
2927 (const_int 5) (const_int 13)])))
2933 (parallel [(const_int 2) (const_int 10)
2934 (const_int 3) (const_int 11)
2935 (const_int 6) (const_int 14)
2936 (const_int 7) (const_int 15)])))
2937 (set (match_operand:V8SF 0 "register_operand" "")
2942 (parallel [(const_int 0) (const_int 1)
2943 (const_int 2) (const_int 3)
2944 (const_int 8) (const_int 9)
2945 (const_int 10) (const_int 11)])))]
2948 operands[3] = gen_reg_rtx (V8SFmode);
2949 operands[4] = gen_reg_rtx (V8SFmode);
2952 (define_insn "vec_interleave_lowv4sf"
2953 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2956 (match_operand:V4SF 1 "register_operand" "0,x")
2957 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2958 (parallel [(const_int 0) (const_int 4)
2959 (const_int 1) (const_int 5)])))]
2962 unpcklps\t{%2, %0|%0, %2}
2963 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
2964 [(set_attr "isa" "noavx,avx")
2965 (set_attr "type" "sselog")
2966 (set_attr "prefix" "orig,vex")
2967 (set_attr "mode" "V4SF")])
2969 ;; These are modeled with the same vec_concat as the others so that we
2970 ;; capture users of shufps that can use the new instructions
2971 (define_insn "avx_movshdup256"
2972 [(set (match_operand:V8SF 0 "register_operand" "=x")
2975 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2977 (parallel [(const_int 1) (const_int 1)
2978 (const_int 3) (const_int 3)
2979 (const_int 5) (const_int 5)
2980 (const_int 7) (const_int 7)])))]
2982 "vmovshdup\t{%1, %0|%0, %1}"
2983 [(set_attr "type" "sse")
2984 (set_attr "prefix" "vex")
2985 (set_attr "mode" "V8SF")])
2987 (define_insn "sse3_movshdup"
2988 [(set (match_operand:V4SF 0 "register_operand" "=x")
2991 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2993 (parallel [(const_int 1)
2998 "%vmovshdup\t{%1, %0|%0, %1}"
2999 [(set_attr "type" "sse")
3000 (set_attr "prefix_rep" "1")
3001 (set_attr "prefix" "maybe_vex")
3002 (set_attr "mode" "V4SF")])
3004 (define_insn "avx_movsldup256"
3005 [(set (match_operand:V8SF 0 "register_operand" "=x")
3008 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3010 (parallel [(const_int 0) (const_int 0)
3011 (const_int 2) (const_int 2)
3012 (const_int 4) (const_int 4)
3013 (const_int 6) (const_int 6)])))]
3015 "vmovsldup\t{%1, %0|%0, %1}"
3016 [(set_attr "type" "sse")
3017 (set_attr "prefix" "vex")
3018 (set_attr "mode" "V8SF")])
3020 (define_insn "sse3_movsldup"
3021 [(set (match_operand:V4SF 0 "register_operand" "=x")
3024 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3026 (parallel [(const_int 0)
3031 "%vmovsldup\t{%1, %0|%0, %1}"
3032 [(set_attr "type" "sse")
3033 (set_attr "prefix_rep" "1")
3034 (set_attr "prefix" "maybe_vex")
3035 (set_attr "mode" "V4SF")])
3037 (define_expand "avx_shufps256"
3038 [(match_operand:V8SF 0 "register_operand" "")
3039 (match_operand:V8SF 1 "register_operand" "")
3040 (match_operand:V8SF 2 "nonimmediate_operand" "")
3041 (match_operand:SI 3 "const_int_operand" "")]
3044 int mask = INTVAL (operands[3]);
3045 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3046 GEN_INT ((mask >> 0) & 3),
3047 GEN_INT ((mask >> 2) & 3),
3048 GEN_INT (((mask >> 4) & 3) + 8),
3049 GEN_INT (((mask >> 6) & 3) + 8),
3050 GEN_INT (((mask >> 0) & 3) + 4),
3051 GEN_INT (((mask >> 2) & 3) + 4),
3052 GEN_INT (((mask >> 4) & 3) + 12),
3053 GEN_INT (((mask >> 6) & 3) + 12)));
3057 ;; One bit in mask selects 2 elements.
3058 (define_insn "avx_shufps256_1"
3059 [(set (match_operand:V8SF 0 "register_operand" "=x")
3062 (match_operand:V8SF 1 "register_operand" "x")
3063 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3064 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3065 (match_operand 4 "const_0_to_3_operand" "")
3066 (match_operand 5 "const_8_to_11_operand" "")
3067 (match_operand 6 "const_8_to_11_operand" "")
3068 (match_operand 7 "const_4_to_7_operand" "")
3069 (match_operand 8 "const_4_to_7_operand" "")
3070 (match_operand 9 "const_12_to_15_operand" "")
3071 (match_operand 10 "const_12_to_15_operand" "")])))]
3073 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3074 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3075 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3076 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3079 mask = INTVAL (operands[3]);
3080 mask |= INTVAL (operands[4]) << 2;
3081 mask |= (INTVAL (operands[5]) - 8) << 4;
3082 mask |= (INTVAL (operands[6]) - 8) << 6;
3083 operands[3] = GEN_INT (mask);
3085 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3087 [(set_attr "type" "sselog")
3088 (set_attr "length_immediate" "1")
3089 (set_attr "prefix" "vex")
3090 (set_attr "mode" "V8SF")])
3092 (define_expand "sse_shufps"
3093 [(match_operand:V4SF 0 "register_operand" "")
3094 (match_operand:V4SF 1 "register_operand" "")
3095 (match_operand:V4SF 2 "nonimmediate_operand" "")
3096 (match_operand:SI 3 "const_int_operand" "")]
3099 int mask = INTVAL (operands[3]);
3100 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3101 GEN_INT ((mask >> 0) & 3),
3102 GEN_INT ((mask >> 2) & 3),
3103 GEN_INT (((mask >> 4) & 3) + 4),
3104 GEN_INT (((mask >> 6) & 3) + 4)));
3108 (define_insn "sse_shufps_<mode>"
3109 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3110 (vec_select:VI4F_128
3111 (vec_concat:<ssedoublevecmode>
3112 (match_operand:VI4F_128 1 "register_operand" "0,x")
3113 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3114 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3115 (match_operand 4 "const_0_to_3_operand" "")
3116 (match_operand 5 "const_4_to_7_operand" "")
3117 (match_operand 6 "const_4_to_7_operand" "")])))]
3121 mask |= INTVAL (operands[3]) << 0;
3122 mask |= INTVAL (operands[4]) << 2;
3123 mask |= (INTVAL (operands[5]) - 4) << 4;
3124 mask |= (INTVAL (operands[6]) - 4) << 6;
3125 operands[3] = GEN_INT (mask);
3127 switch (which_alternative)
3130 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3132 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3137 [(set_attr "isa" "noavx,avx")
3138 (set_attr "type" "sselog")
3139 (set_attr "length_immediate" "1")
3140 (set_attr "prefix" "orig,vex")
3141 (set_attr "mode" "V4SF")])
3143 (define_insn "sse_storehps"
3144 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3146 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3147 (parallel [(const_int 2) (const_int 3)])))]
3150 %vmovhps\t{%1, %0|%0, %1}
3151 %vmovhlps\t{%1, %d0|%d0, %1}
3152 %vmovlps\t{%H1, %d0|%d0, %H1}"
3153 [(set_attr "type" "ssemov")
3154 (set_attr "prefix" "maybe_vex")
3155 (set_attr "mode" "V2SF,V4SF,V2SF")])
3157 (define_expand "sse_loadhps_exp"
3158 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3161 (match_operand:V4SF 1 "nonimmediate_operand" "")
3162 (parallel [(const_int 0) (const_int 1)]))
3163 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3166 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3168 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3170 /* Fix up the destination if needed. */
3171 if (dst != operands[0])
3172 emit_move_insn (operands[0], dst);
3177 (define_insn "sse_loadhps"
3178 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3181 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3182 (parallel [(const_int 0) (const_int 1)]))
3183 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3186 movhps\t{%2, %0|%0, %2}
3187 vmovhps\t{%2, %1, %0|%0, %1, %2}
3188 movlhps\t{%2, %0|%0, %2}
3189 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3190 %vmovlps\t{%2, %H0|%H0, %2}"
3191 [(set_attr "isa" "noavx,avx,noavx,avx,base")
3192 (set_attr "type" "ssemov")
3193 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3194 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3196 (define_insn "sse_storelps"
3197 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3199 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3200 (parallel [(const_int 0) (const_int 1)])))]
3203 %vmovlps\t{%1, %0|%0, %1}
3204 %vmovaps\t{%1, %0|%0, %1}
3205 %vmovlps\t{%1, %d0|%d0, %1}"
3206 [(set_attr "type" "ssemov")
3207 (set_attr "prefix" "maybe_vex")
3208 (set_attr "mode" "V2SF,V4SF,V2SF")])
3210 (define_expand "sse_loadlps_exp"
3211 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3213 (match_operand:V2SF 2 "nonimmediate_operand" "")
3215 (match_operand:V4SF 1 "nonimmediate_operand" "")
3216 (parallel [(const_int 2) (const_int 3)]))))]
3219 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3221 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3223 /* Fix up the destination if needed. */
3224 if (dst != operands[0])
3225 emit_move_insn (operands[0], dst);
3230 (define_insn "sse_loadlps"
3231 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3233 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3235 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3236 (parallel [(const_int 2) (const_int 3)]))))]
3239 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3240 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3241 movlps\t{%2, %0|%0, %2}
3242 vmovlps\t{%2, %1, %0|%0, %1, %2}
3243 %vmovlps\t{%2, %0|%0, %2}"
3244 [(set_attr "isa" "noavx,avx,noavx,avx,base")
3245 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3246 (set_attr "length_immediate" "1,1,*,*,*")
3247 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3248 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3250 (define_insn "sse_movss"
3251 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3253 (match_operand:V4SF 2 "register_operand" " x,x")
3254 (match_operand:V4SF 1 "register_operand" " 0,x")
3258 movss\t{%2, %0|%0, %2}
3259 vmovss\t{%2, %1, %0|%0, %1, %2}"
3260 [(set_attr "isa" "noavx,avx")
3261 (set_attr "type" "ssemov")
3262 (set_attr "prefix" "orig,vex")
3263 (set_attr "mode" "SF")])
3265 (define_expand "vec_dupv4sf"
3266 [(set (match_operand:V4SF 0 "register_operand" "")
3268 (match_operand:SF 1 "nonimmediate_operand" "")))]
3272 operands[1] = force_reg (V4SFmode, operands[1]);
3275 (define_insn "*vec_dupv4sf_avx"
3276 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3278 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3281 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3282 vbroadcastss\t{%1, %0|%0, %1}"
3283 [(set_attr "type" "sselog1,ssemov")
3284 (set_attr "length_immediate" "1,0")
3285 (set_attr "prefix_extra" "0,1")
3286 (set_attr "prefix" "vex")
3287 (set_attr "mode" "V4SF")])
3289 (define_insn "*vec_dupv4sf"
3290 [(set (match_operand:V4SF 0 "register_operand" "=x")
3292 (match_operand:SF 1 "register_operand" "0")))]
3294 "shufps\t{$0, %0, %0|%0, %0, 0}"
3295 [(set_attr "type" "sselog1")
3296 (set_attr "length_immediate" "1")
3297 (set_attr "mode" "V4SF")])
3299 ;; Although insertps takes register source, we prefer
3300 ;; unpcklps with register source since it is shorter.
3301 (define_insn "*vec_concatv2sf_sse4_1"
3302 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3304 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3305 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3308 unpcklps\t{%2, %0|%0, %2}
3309 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3310 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3311 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3312 %vmovss\t{%1, %0|%0, %1}
3313 punpckldq\t{%2, %0|%0, %2}
3314 movd\t{%1, %0|%0, %1}"
3315 [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base")
3316 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3317 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3318 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3319 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3320 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3321 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3323 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3324 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3325 ;; alternatives pretty much forces the MMX alternative to be chosen.
3326 (define_insn "*vec_concatv2sf_sse"
3327 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3329 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3330 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3333 unpcklps\t{%2, %0|%0, %2}
3334 movss\t{%1, %0|%0, %1}
3335 punpckldq\t{%2, %0|%0, %2}
3336 movd\t{%1, %0|%0, %1}"
3337 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3338 (set_attr "mode" "V4SF,SF,DI,DI")])
3340 (define_insn "*vec_concatv4sf_sse"
3341 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3343 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3344 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3347 movlhps\t{%2, %0|%0, %2}
3348 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3349 movhps\t{%2, %0|%0, %2}
3350 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3351 [(set_attr "isa" "noavx,avx,noavx,avx")
3352 (set_attr "type" "ssemov")
3353 (set_attr "prefix" "orig,vex,orig,vex")
3354 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3356 (define_expand "vec_init<mode>"
3357 [(match_operand:V_128 0 "register_operand" "")
3358 (match_operand 1 "" "")]
3361 ix86_expand_vector_init (false, operands[0], operands[1]);
3365 ;; Avoid combining registers from different units in a single alternative,
3366 ;; see comment above inline_secondary_memory_needed function in i386.c
3367 (define_insn "*vec_set<mode>_0_sse4_1"
3368 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3369 "=x,x,x ,x,x,x ,x ,m,m,m")
3371 (vec_duplicate:VI4F_128
3372 (match_operand:<ssescalarmode> 2 "general_operand"
3373 " x,m,*r,x,x,*rm,*rm,x,*r,fF"))
3374 (match_operand:VI4F_128 1 "vector_move_operand"
3375 " C,C,C ,0,x,0 ,x ,0,0 ,0")
3379 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3380 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3381 %vmovd\t{%2, %0|%0, %2}
3382 movss\t{%2, %0|%0, %2}
3383 vmovss\t{%2, %1, %0|%0, %1, %2}
3384 pinsrd\t{$0, %2, %0|%0, %2, 0}
3385 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3389 [(set_attr "isa" "base,base,base,noavx,avx,noavx,avx,base,base,base")
3390 (set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov,sselog,sselog,*,*,*")
3391 (set_attr "prefix_extra" "*,*,*,*,*,1,1,*,*,*")
3392 (set_attr "length_immediate" "*,*,*,*,*,1,1,*,*,*")
3393 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,vex,orig,vex,*,*,*")
3394 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,TI,TI,*,*,*")])
3396 ;; Avoid combining registers from different units in a single alternative,
3397 ;; see comment above inline_secondary_memory_needed function in i386.c
3398 (define_insn "*vec_set<mode>_0_sse2"
3399 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3402 (vec_duplicate:VI4F_128
3403 (match_operand:<ssescalarmode> 2 "general_operand"
3405 (match_operand:VI4F_128 1 "vector_move_operand"
3410 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3411 movd\t{%2, %0|%0, %2}
3412 movss\t{%2, %0|%0, %2}
3416 [(set_attr "type" "ssemov")
3417 (set_attr "mode" "<ssescalarmode>,SI,SF,*,*,*")])
3419 ;; Avoid combining registers from different units in a single alternative,
3420 ;; see comment above inline_secondary_memory_needed function in i386.c
3421 (define_insn "vec_set<mode>_0"
3422 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3425 (vec_duplicate:VI4F_128
3426 (match_operand:<ssescalarmode> 2 "general_operand"
3428 (match_operand:VI4F_128 1 "vector_move_operand"
3433 movss\t{%2, %0|%0, %2}
3434 movss\t{%2, %0|%0, %2}
3438 [(set_attr "type" "ssemov")
3439 (set_attr "mode" "SF,SF,*,*,*")])
3441 ;; A subset is vec_setv4sf.
3442 (define_insn "*vec_setv4sf_sse4_1"
3443 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3446 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3447 (match_operand:V4SF 1 "register_operand" "0,x")
3448 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n,n")))]
3451 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3452 switch (which_alternative)
3455 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3457 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3462 [(set_attr "isa" "noavx,avx")
3463 (set_attr "type" "sselog")
3464 (set_attr "prefix_data16" "1,*")
3465 (set_attr "prefix_extra" "1")
3466 (set_attr "length_immediate" "1")
3467 (set_attr "prefix" "orig,vex")
3468 (set_attr "mode" "V4SF")])
3470 (define_insn "sse4_1_insertps"
3471 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3472 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3473 (match_operand:V4SF 1 "register_operand" "0,x")
3474 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3478 if (MEM_P (operands[2]))
3480 unsigned count_s = INTVAL (operands[3]) >> 6;
3482 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3483 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3485 switch (which_alternative)
3488 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3490 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3495 [(set_attr "isa" "noavx,avx")
3496 (set_attr "type" "sselog")
3497 (set_attr "prefix_data16" "1,*")
3498 (set_attr "prefix_extra" "1")
3499 (set_attr "length_immediate" "1")
3500 (set_attr "prefix" "orig,vex")
3501 (set_attr "mode" "V4SF")])
3504 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3506 (vec_duplicate:VI4F_128
3507 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3510 "TARGET_SSE && reload_completed"
3513 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3518 (define_expand "vec_set<mode>"
3519 [(match_operand:V_128 0 "register_operand" "")
3520 (match_operand:<ssescalarmode> 1 "register_operand" "")
3521 (match_operand 2 "const_int_operand" "")]
3524 ix86_expand_vector_set (false, operands[0], operands[1],
3525 INTVAL (operands[2]));
3529 (define_insn_and_split "*vec_extractv4sf_0"
3530 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3532 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3533 (parallel [(const_int 0)])))]
3534 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3536 "&& reload_completed"
3539 rtx op1 = operands[1];
3541 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3543 op1 = gen_lowpart (SFmode, op1);
3544 emit_move_insn (operands[0], op1);
3548 (define_expand "avx_vextractf128<mode>"
3549 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3550 (match_operand:V_256 1 "register_operand" "")
3551 (match_operand:SI 2 "const_0_to_1_operand" "")]
3554 rtx (*insn)(rtx, rtx);
3556 switch (INTVAL (operands[2]))
3559 insn = gen_vec_extract_lo_<mode>;
3562 insn = gen_vec_extract_hi_<mode>;
3568 emit_insn (insn (operands[0], operands[1]));
3572 (define_insn_and_split "vec_extract_lo_<mode>"
3573 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3574 (vec_select:<ssehalfvecmode>
3575 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
3576 (parallel [(const_int 0) (const_int 1)])))]
3579 "&& reload_completed"
3582 rtx op1 = operands[1];
3584 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3586 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3587 emit_move_insn (operands[0], op1);
3591 (define_insn "vec_extract_hi_<mode>"
3592 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3593 (vec_select:<ssehalfvecmode>
3594 (match_operand:VI8F_256 1 "register_operand" "x,x")
3595 (parallel [(const_int 2) (const_int 3)])))]
3597 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3598 [(set_attr "type" "sselog")
3599 (set_attr "prefix_extra" "1")
3600 (set_attr "length_immediate" "1")
3601 (set_attr "memory" "none,store")
3602 (set_attr "prefix" "vex")
3603 (set_attr "mode" "V8SF")])
3605 (define_insn_and_split "vec_extract_lo_<mode>"
3606 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3607 (vec_select:<ssehalfvecmode>
3608 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
3609 (parallel [(const_int 0) (const_int 1)
3610 (const_int 2) (const_int 3)])))]
3613 "&& reload_completed"
3616 rtx op1 = operands[1];
3618 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3620 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3621 emit_move_insn (operands[0], op1);
3625 (define_insn "vec_extract_hi_<mode>"
3626 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3627 (vec_select:<ssehalfvecmode>
3628 (match_operand:VI4F_256 1 "register_operand" "x,x")
3629 (parallel [(const_int 4) (const_int 5)
3630 (const_int 6) (const_int 7)])))]
3632 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3633 [(set_attr "type" "sselog")
3634 (set_attr "prefix_extra" "1")
3635 (set_attr "length_immediate" "1")
3636 (set_attr "memory" "none,store")
3637 (set_attr "prefix" "vex")
3638 (set_attr "mode" "V8SF")])
3640 (define_insn_and_split "vec_extract_lo_v16hi"
3641 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3643 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
3644 (parallel [(const_int 0) (const_int 1)
3645 (const_int 2) (const_int 3)
3646 (const_int 4) (const_int 5)
3647 (const_int 6) (const_int 7)])))]
3650 "&& reload_completed"
3653 rtx op1 = operands[1];
3655 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
3657 op1 = gen_lowpart (V8HImode, op1);
3658 emit_move_insn (operands[0], op1);
3662 (define_insn "vec_extract_hi_v16hi"
3663 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3665 (match_operand:V16HI 1 "register_operand" "x,x")
3666 (parallel [(const_int 8) (const_int 9)
3667 (const_int 10) (const_int 11)
3668 (const_int 12) (const_int 13)
3669 (const_int 14) (const_int 15)])))]
3671 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3672 [(set_attr "type" "sselog")
3673 (set_attr "prefix_extra" "1")
3674 (set_attr "length_immediate" "1")
3675 (set_attr "memory" "none,store")
3676 (set_attr "prefix" "vex")
3677 (set_attr "mode" "V8SF")])
3679 (define_insn_and_split "vec_extract_lo_v32qi"
3680 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3682 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
3683 (parallel [(const_int 0) (const_int 1)
3684 (const_int 2) (const_int 3)
3685 (const_int 4) (const_int 5)
3686 (const_int 6) (const_int 7)
3687 (const_int 8) (const_int 9)
3688 (const_int 10) (const_int 11)
3689 (const_int 12) (const_int 13)
3690 (const_int 14) (const_int 15)])))]
3693 "&& reload_completed"
3696 rtx op1 = operands[1];
3698 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
3700 op1 = gen_lowpart (V16QImode, op1);
3701 emit_move_insn (operands[0], op1);
3705 (define_insn "vec_extract_hi_v32qi"
3706 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3708 (match_operand:V32QI 1 "register_operand" "x,x")
3709 (parallel [(const_int 16) (const_int 17)
3710 (const_int 18) (const_int 19)
3711 (const_int 20) (const_int 21)
3712 (const_int 22) (const_int 23)
3713 (const_int 24) (const_int 25)
3714 (const_int 26) (const_int 27)
3715 (const_int 28) (const_int 29)
3716 (const_int 30) (const_int 31)])))]
3718 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3719 [(set_attr "type" "sselog")
3720 (set_attr "prefix_extra" "1")
3721 (set_attr "length_immediate" "1")
3722 (set_attr "memory" "none,store")
3723 (set_attr "prefix" "vex")
3724 (set_attr "mode" "V8SF")])
3726 (define_insn "*sse4_1_extractps"
3727 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3729 (match_operand:V4SF 1 "register_operand" "x")
3730 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3732 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3733 [(set_attr "type" "sselog")
3734 (set_attr "prefix_data16" "1")
3735 (set_attr "prefix_extra" "1")
3736 (set_attr "length_immediate" "1")
3737 (set_attr "prefix" "maybe_vex")
3738 (set_attr "mode" "V4SF")])
3740 (define_insn_and_split "*vec_extract_v4sf_mem"
3741 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3743 (match_operand:V4SF 1 "memory_operand" "o")
3744 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3750 int i = INTVAL (operands[2]);
3752 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3756 ;; Modes handled by vec_extract patterns.
3757 (define_mode_iterator VEC_EXTRACT_MODE
3758 [V16QI V8HI V4SI V2DI
3759 (V8SF "TARGET_AVX") V4SF
3760 (V4DF "TARGET_AVX") V2DF])
3762 (define_expand "vec_extract<mode>"
3763 [(match_operand:<ssescalarmode> 0 "register_operand" "")
3764 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
3765 (match_operand 2 "const_int_operand" "")]
3768 ix86_expand_vector_extract (false, operands[0], operands[1],
3769 INTVAL (operands[2]));
3773 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3775 ;; Parallel double-precision floating point element swizzling
3777 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3779 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3780 (define_insn "avx_unpckhpd256"
3781 [(set (match_operand:V4DF 0 "register_operand" "=x")
3784 (match_operand:V4DF 1 "register_operand" "x")
3785 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3786 (parallel [(const_int 1) (const_int 5)
3787 (const_int 3) (const_int 7)])))]
3789 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3790 [(set_attr "type" "sselog")
3791 (set_attr "prefix" "vex")
3792 (set_attr "mode" "V4DF")])
3794 (define_expand "vec_interleave_highv4df"
3798 (match_operand:V4DF 1 "register_operand" "x")
3799 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3800 (parallel [(const_int 0) (const_int 4)
3801 (const_int 2) (const_int 6)])))
3807 (parallel [(const_int 1) (const_int 5)
3808 (const_int 3) (const_int 7)])))
3809 (set (match_operand:V4DF 0 "register_operand" "")
3814 (parallel [(const_int 2) (const_int 3)
3815 (const_int 6) (const_int 7)])))]
3818 operands[3] = gen_reg_rtx (V4DFmode);
3819 operands[4] = gen_reg_rtx (V4DFmode);
3823 (define_expand "vec_interleave_highv2df"
3824 [(set (match_operand:V2DF 0 "register_operand" "")
3827 (match_operand:V2DF 1 "nonimmediate_operand" "")
3828 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3829 (parallel [(const_int 1)
3833 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
3834 operands[2] = force_reg (V2DFmode, operands[2]);
3837 (define_insn "*sse3_interleave_highv2df"
3838 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
3841 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
3842 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
3843 (parallel [(const_int 1)
3845 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3847 unpckhpd\t{%2, %0|%0, %2}
3848 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
3849 %vmovddup\t{%H1, %0|%0, %H1}
3850 movlpd\t{%H1, %0|%0, %H1}
3851 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3852 %vmovhpd\t{%1, %0|%0, %1}"
3853 [(set_attr "isa" "noavx,avx,base,noavx,avx,base")
3854 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
3855 (set_attr "prefix_data16" "*,*,*,1,*,1")
3856 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
3857 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
3859 (define_insn "*sse2_interleave_highv2df"
3860 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3863 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3864 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3865 (parallel [(const_int 1)
3867 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3869 unpckhpd\t{%2, %0|%0, %2}
3870 movlpd\t{%H1, %0|%0, %H1}
3871 movhpd\t{%1, %0|%0, %1}"
3872 [(set_attr "type" "sselog,ssemov,ssemov")
3873 (set_attr "prefix_data16" "*,1,1")
3874 (set_attr "mode" "V2DF,V1DF,V1DF")])
3876 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3877 (define_expand "avx_movddup256"
3878 [(set (match_operand:V4DF 0 "register_operand" "")
3881 (match_operand:V4DF 1 "nonimmediate_operand" "")
3883 (parallel [(const_int 0) (const_int 4)
3884 (const_int 2) (const_int 6)])))]
3887 (define_expand "avx_unpcklpd256"
3888 [(set (match_operand:V4DF 0 "register_operand" "")
3891 (match_operand:V4DF 1 "register_operand" "")
3892 (match_operand:V4DF 2 "nonimmediate_operand" ""))
3893 (parallel [(const_int 0) (const_int 4)
3894 (const_int 2) (const_int 6)])))]
3897 (define_insn "*avx_unpcklpd256"
3898 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
3901 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
3902 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
3903 (parallel [(const_int 0) (const_int 4)
3904 (const_int 2) (const_int 6)])))]
3906 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
3908 vmovddup\t{%1, %0|%0, %1}
3909 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
3910 [(set_attr "type" "sselog")
3911 (set_attr "prefix" "vex")
3912 (set_attr "mode" "V4DF")])
3914 (define_expand "vec_interleave_lowv4df"
3918 (match_operand:V4DF 1 "register_operand" "x")
3919 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3920 (parallel [(const_int 0) (const_int 4)
3921 (const_int 2) (const_int 6)])))
3927 (parallel [(const_int 1) (const_int 5)
3928 (const_int 3) (const_int 7)])))
3929 (set (match_operand:V4DF 0 "register_operand" "")
3934 (parallel [(const_int 0) (const_int 1)
3935 (const_int 4) (const_int 5)])))]
3938 operands[3] = gen_reg_rtx (V4DFmode);
3939 operands[4] = gen_reg_rtx (V4DFmode);
3942 (define_expand "vec_interleave_lowv2df"
3943 [(set (match_operand:V2DF 0 "register_operand" "")
3946 (match_operand:V2DF 1 "nonimmediate_operand" "")
3947 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3948 (parallel [(const_int 0)
3952 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
3953 operands[1] = force_reg (V2DFmode, operands[1]);
3956 (define_insn "*sse3_interleave_lowv2df"
3957 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
3960 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
3961 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
3962 (parallel [(const_int 0)
3964 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
3966 unpcklpd\t{%2, %0|%0, %2}
3967 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
3968 %vmovddup\t{%1, %0|%0, %1}
3969 movhpd\t{%2, %0|%0, %2}
3970 vmovhpd\t{%2, %1, %0|%0, %1, %2}
3971 %vmovlpd\t{%2, %H0|%H0, %2}"
3972 [(set_attr "isa" "noavx,avx,base,noavx,avx,base")
3973 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
3974 (set_attr "prefix_data16" "*,*,*,1,*,1")
3975 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
3976 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
3978 (define_insn "*sse2_interleave_lowv2df"
3979 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
3982 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
3983 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
3984 (parallel [(const_int 0)
3986 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
3988 unpcklpd\t{%2, %0|%0, %2}
3989 movhpd\t{%2, %0|%0, %2}
3990 movlpd\t{%2, %H0|%H0, %2}"
3991 [(set_attr "type" "sselog,ssemov,ssemov")
3992 (set_attr "prefix_data16" "*,1,1")
3993 (set_attr "mode" "V2DF,V1DF,V1DF")])
3996 [(set (match_operand:V2DF 0 "memory_operand" "")
3999 (match_operand:V2DF 1 "register_operand" "")
4001 (parallel [(const_int 0)
4003 "TARGET_SSE3 && reload_completed"
4006 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4007 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4008 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4013 [(set (match_operand:V2DF 0 "register_operand" "")
4016 (match_operand:V2DF 1 "memory_operand" "")
4018 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4019 (match_operand:SI 3 "const_int_operand" "")])))]
4020 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4021 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4023 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4026 (define_expand "avx_shufpd256"
4027 [(match_operand:V4DF 0 "register_operand" "")
4028 (match_operand:V4DF 1 "register_operand" "")
4029 (match_operand:V4DF 2 "nonimmediate_operand" "")
4030 (match_operand:SI 3 "const_int_operand" "")]
4033 int mask = INTVAL (operands[3]);
4034 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4036 GEN_INT (mask & 2 ? 5 : 4),
4037 GEN_INT (mask & 4 ? 3 : 2),
4038 GEN_INT (mask & 8 ? 7 : 6)));
4042 (define_insn "avx_shufpd256_1"
4043 [(set (match_operand:V4DF 0 "register_operand" "=x")
4046 (match_operand:V4DF 1 "register_operand" "x")
4047 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4048 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4049 (match_operand 4 "const_4_to_5_operand" "")
4050 (match_operand 5 "const_2_to_3_operand" "")
4051 (match_operand 6 "const_6_to_7_operand" "")])))]
4055 mask = INTVAL (operands[3]);
4056 mask |= (INTVAL (operands[4]) - 4) << 1;
4057 mask |= (INTVAL (operands[5]) - 2) << 2;
4058 mask |= (INTVAL (operands[6]) - 6) << 3;
4059 operands[3] = GEN_INT (mask);
4061 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4063 [(set_attr "type" "sselog")
4064 (set_attr "length_immediate" "1")
4065 (set_attr "prefix" "vex")
4066 (set_attr "mode" "V4DF")])
4068 (define_expand "sse2_shufpd"
4069 [(match_operand:V2DF 0 "register_operand" "")
4070 (match_operand:V2DF 1 "register_operand" "")
4071 (match_operand:V2DF 2 "nonimmediate_operand" "")
4072 (match_operand:SI 3 "const_int_operand" "")]
4075 int mask = INTVAL (operands[3]);
4076 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4078 GEN_INT (mask & 2 ? 3 : 2)));
4082 ;; Modes handled by vec_extract_even/odd pattern.
4083 (define_mode_iterator VEC_EXTRACT_EVENODD_MODE
4084 [(V16QI "TARGET_SSE2")
4085 (V8HI "TARGET_SSE2")
4086 (V4SI "TARGET_SSE2")
4087 (V2DI "TARGET_SSE2")
4088 (V8SF "TARGET_AVX") V4SF
4089 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
4091 (define_expand "vec_extract_even<mode>"
4092 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4093 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4094 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4097 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4101 (define_expand "vec_extract_odd<mode>"
4102 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4103 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4104 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4107 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4111 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4113 (define_insn "vec_interleave_highv2di"
4114 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4117 (match_operand:V2DI 1 "register_operand" "0,x")
4118 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4119 (parallel [(const_int 1)
4123 punpckhqdq\t{%2, %0|%0, %2}
4124 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4125 [(set_attr "isa" "noavx,avx")
4126 (set_attr "type" "sselog")
4127 (set_attr "prefix_data16" "1,*")
4128 (set_attr "prefix" "orig,vex")
4129 (set_attr "mode" "TI")])
4131 (define_insn "vec_interleave_lowv2di"
4132 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4135 (match_operand:V2DI 1 "register_operand" "0,x")
4136 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4137 (parallel [(const_int 0)
4141 punpcklqdq\t{%2, %0|%0, %2}
4142 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4143 [(set_attr "isa" "noavx,avx")
4144 (set_attr "type" "sselog")
4145 (set_attr "prefix_data16" "1,*")
4146 (set_attr "prefix" "orig,vex")
4147 (set_attr "mode" "TI")])
4149 (define_insn "sse2_shufpd_<mode>"
4150 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4151 (vec_select:VI8F_128
4152 (vec_concat:<ssedoublevecmode>
4153 (match_operand:VI8F_128 1 "register_operand" "0,x")
4154 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4155 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4156 (match_operand 4 "const_2_to_3_operand" "")])))]
4160 mask = INTVAL (operands[3]);
4161 mask |= (INTVAL (operands[4]) - 2) << 1;
4162 operands[3] = GEN_INT (mask);
4164 switch (which_alternative)
4167 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4169 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4174 [(set_attr "isa" "noavx,avx")
4175 (set_attr "type" "sselog")
4176 (set_attr "length_immediate" "1")
4177 (set_attr "prefix" "orig,vex")
4178 (set_attr "mode" "V2DF")])
4180 ;; Avoid combining registers from different units in a single alternative,
4181 ;; see comment above inline_secondary_memory_needed function in i386.c
4182 (define_insn "sse2_storehpd"
4183 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4185 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4186 (parallel [(const_int 1)])))]
4187 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4189 %vmovhpd\t{%1, %0|%0, %1}
4191 vunpckhpd\t{%d1, %0|%0, %d1}
4195 [(set_attr "isa" "base,noavx,avx,base,base,base")
4196 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4197 (set (attr "prefix_data16")
4199 (and (eq_attr "alternative" "0")
4200 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4202 (const_string "*")))
4203 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4204 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4207 [(set (match_operand:DF 0 "register_operand" "")
4209 (match_operand:V2DF 1 "memory_operand" "")
4210 (parallel [(const_int 1)])))]
4211 "TARGET_SSE2 && reload_completed"
4212 [(set (match_dup 0) (match_dup 1))]
4213 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4215 ;; Avoid combining registers from different units in a single alternative,
4216 ;; see comment above inline_secondary_memory_needed function in i386.c
4217 (define_insn "sse2_storelpd"
4218 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4220 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4221 (parallel [(const_int 0)])))]
4222 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4224 %vmovlpd\t{%1, %0|%0, %1}
4229 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4230 (set_attr "prefix_data16" "1,*,*,*,*")
4231 (set_attr "prefix" "maybe_vex")
4232 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4235 [(set (match_operand:DF 0 "register_operand" "")
4237 (match_operand:V2DF 1 "nonimmediate_operand" "")
4238 (parallel [(const_int 0)])))]
4239 "TARGET_SSE2 && reload_completed"
4242 rtx op1 = operands[1];
4244 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4246 op1 = gen_lowpart (DFmode, op1);
4247 emit_move_insn (operands[0], op1);
4251 (define_expand "sse2_loadhpd_exp"
4252 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4255 (match_operand:V2DF 1 "nonimmediate_operand" "")
4256 (parallel [(const_int 0)]))
4257 (match_operand:DF 2 "nonimmediate_operand" "")))]
4260 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4262 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4264 /* Fix up the destination if needed. */
4265 if (dst != operands[0])
4266 emit_move_insn (operands[0], dst);
4271 ;; Avoid combining registers from different units in a single alternative,
4272 ;; see comment above inline_secondary_memory_needed function in i386.c
4273 (define_insn "sse2_loadhpd"
4274 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4275 "=x,x,x,x,x,o,o ,o")
4278 (match_operand:V2DF 1 "nonimmediate_operand"
4279 " 0,x,0,x,x,0,0 ,0")
4280 (parallel [(const_int 0)]))
4281 (match_operand:DF 2 "nonimmediate_operand"
4282 " m,m,x,x,0,x,*f,r")))]
4283 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4285 movhpd\t{%2, %0|%0, %2}
4286 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4287 unpcklpd\t{%2, %0|%0, %2}
4288 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4289 shufpd\t{$1, %1, %0|%0, %1, 1}
4293 [(set_attr "isa" "noavx,avx,noavx,avx,noavx,base,base,base")
4294 (set_attr "type" "ssemov,ssemov,sselog,sselog,sselog,ssemov,fmov,imov")
4295 (set_attr "prefix_data16" "1,*,*,*,*,*,*,*")
4296 (set_attr "length_immediate" "*,*,*,*,1,*,*,*")
4297 (set_attr "prefix" "orig,vex,orig,vex,orig,*,*,*")
4298 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,V2DF,DF,DF,DF")])
4301 [(set (match_operand:V2DF 0 "memory_operand" "")
4303 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4304 (match_operand:DF 1 "register_operand" "")))]
4305 "TARGET_SSE2 && reload_completed"
4306 [(set (match_dup 0) (match_dup 1))]
4307 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4309 (define_expand "sse2_loadlpd_exp"
4310 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4312 (match_operand:DF 2 "nonimmediate_operand" "")
4314 (match_operand:V2DF 1 "nonimmediate_operand" "")
4315 (parallel [(const_int 1)]))))]
4318 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4320 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4322 /* Fix up the destination if needed. */
4323 if (dst != operands[0])
4324 emit_move_insn (operands[0], dst);
4329 ;; Avoid combining registers from different units in a single alternative,
4330 ;; see comment above inline_secondary_memory_needed function in i386.c
4331 (define_insn "sse2_loadlpd"
4332 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4333 "=x,x,x,x,x,x,x,x,m,m ,m")
4335 (match_operand:DF 2 "nonimmediate_operand"
4336 " m,m,m,x,x,0,0,x,x,*f,r")
4338 (match_operand:V2DF 1 "vector_move_operand"
4339 " C,0,x,0,x,x,o,o,0,0 ,0")
4340 (parallel [(const_int 1)]))))]
4341 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4343 %vmovsd\t{%2, %0|%0, %2}
4344 movlpd\t{%2, %0|%0, %2}
4345 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4346 movsd\t{%2, %0|%0, %2}
4347 vmovsd\t{%2, %1, %0|%0, %1, %2}
4348 shufpd\t{$2, %1, %0|%0, %1, 2}
4349 movhpd\t{%H1, %0|%0, %H1}
4350 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4354 [(set_attr "isa" "base,noavx,avx,noavx,avx,noavx,noavx,avx,base,base,base")
4355 (set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov,fmov,imov")
4356 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4357 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4358 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4359 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4362 [(set (match_operand:V2DF 0 "memory_operand" "")
4364 (match_operand:DF 1 "register_operand" "")
4365 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4366 "TARGET_SSE2 && reload_completed"
4367 [(set (match_dup 0) (match_dup 1))]
4368 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4370 ;; Not sure these two are ever used, but it doesn't hurt to have
4372 (define_insn "*vec_extractv2df_1_sse"
4373 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4375 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4376 (parallel [(const_int 1)])))]
4377 "!TARGET_SSE2 && TARGET_SSE
4378 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4380 movhps\t{%1, %0|%0, %1}
4381 movhlps\t{%1, %0|%0, %1}
4382 movlps\t{%H1, %0|%0, %H1}"
4383 [(set_attr "type" "ssemov")
4384 (set_attr "mode" "V2SF,V4SF,V2SF")])
4386 (define_insn "*vec_extractv2df_0_sse"
4387 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4389 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4390 (parallel [(const_int 0)])))]
4391 "!TARGET_SSE2 && TARGET_SSE
4392 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4394 movlps\t{%1, %0|%0, %1}
4395 movaps\t{%1, %0|%0, %1}
4396 movlps\t{%1, %0|%0, %1}"
4397 [(set_attr "type" "ssemov")
4398 (set_attr "mode" "V2SF,V4SF,V2SF")])
4400 (define_insn "sse2_movsd"
4401 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4403 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4404 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4408 movsd\t{%2, %0|%0, %2}
4409 vmovsd\t{%2, %1, %0|%0, %1, %2}
4410 movlpd\t{%2, %0|%0, %2}
4411 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4412 %vmovlpd\t{%2, %0|%0, %2}
4413 shufpd\t{$2, %1, %0|%0, %1, 2}
4414 movhps\t{%H1, %0|%0, %H1}
4415 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4416 %vmovhps\t{%1, %H0|%H0, %1}"
4417 [(set_attr "isa" "noavx,avx,noavx,avx,base,noavx,noavx,avx,base")
4418 (set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4419 (set (attr "prefix_data16")
4421 (and (eq_attr "alternative" "2,4")
4422 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4424 (const_string "*")))
4425 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4426 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4427 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4429 (define_insn "*vec_dupv2df_sse3"
4430 [(set (match_operand:V2DF 0 "register_operand" "=x")
4432 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4434 "%vmovddup\t{%1, %0|%0, %1}"
4435 [(set_attr "type" "sselog1")
4436 (set_attr "prefix" "maybe_vex")
4437 (set_attr "mode" "DF")])
4439 (define_insn "vec_dupv2df"
4440 [(set (match_operand:V2DF 0 "register_operand" "=x")
4442 (match_operand:DF 1 "register_operand" "0")))]
4445 [(set_attr "type" "sselog1")
4446 (set_attr "mode" "V2DF")])
4448 (define_insn "*vec_concatv2df_sse3"
4449 [(set (match_operand:V2DF 0 "register_operand" "=x")
4451 (match_operand:DF 1 "nonimmediate_operand" "xm")
4454 "%vmovddup\t{%1, %0|%0, %1}"
4455 [(set_attr "type" "sselog1")
4456 (set_attr "prefix" "maybe_vex")
4457 (set_attr "mode" "DF")])
4459 (define_insn "*vec_concatv2df"
4460 [(set (match_operand:V2DF 0 "register_operand" "=Y2,x,Y2,x,Y2,x,x")
4462 (match_operand:DF 1 "nonimmediate_operand" " 0 ,x,0 ,x,m ,0,0")
4463 (match_operand:DF 2 "vector_move_operand" " Y2,x,m ,m,C ,x,m")))]
4466 unpcklpd\t{%2, %0|%0, %2}
4467 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4468 movhpd\t{%2, %0|%0, %2}
4469 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4470 %vmovsd\t{%1, %0|%0, %1}
4471 movlhps\t{%2, %0|%0, %2}
4472 movhps\t{%2, %0|%0, %2}"
4473 [(set_attr "isa" "noavx,avx,noavx,avx,base,noavx,noavx")
4474 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov,ssemov,ssemov")
4475 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4476 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4477 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4479 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4481 ;; Parallel integral arithmetic
4483 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4485 (define_expand "neg<mode>2"
4486 [(set (match_operand:VI_128 0 "register_operand" "")
4489 (match_operand:VI_128 1 "nonimmediate_operand" "")))]
4491 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4493 (define_expand "<plusminus_insn><mode>3"
4494 [(set (match_operand:VI_128 0 "register_operand" "")
4496 (match_operand:VI_128 1 "nonimmediate_operand" "")
4497 (match_operand:VI_128 2 "nonimmediate_operand" "")))]
4499 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4501 (define_insn "*<plusminus_insn><mode>3"
4502 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
4504 (match_operand:VI_128 1 "nonimmediate_operand" "<comm>0,x")
4505 (match_operand:VI_128 2 "nonimmediate_operand" "xm,xm")))]
4506 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4508 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4509 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4510 [(set_attr "isa" "noavx,avx")
4511 (set_attr "type" "sseiadd")
4512 (set_attr "prefix_data16" "1,*")
4513 (set_attr "prefix" "orig,vex")
4514 (set_attr "mode" "TI")])
4516 (define_expand "sse2_<plusminus_insn><mode>3"
4517 [(set (match_operand:VI12_128 0 "register_operand" "")
4518 (sat_plusminus:VI12_128
4519 (match_operand:VI12_128 1 "nonimmediate_operand" "")
4520 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
4522 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4524 (define_insn "*sse2_<plusminus_insn><mode>3"
4525 [(set (match_operand:VI12_128 0 "register_operand" "=x,x")
4526 (sat_plusminus:VI12_128
4527 (match_operand:VI12_128 1 "nonimmediate_operand" "<comm>0,x")
4528 (match_operand:VI12_128 2 "nonimmediate_operand" "xm,xm")))]
4529 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4531 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4532 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4533 [(set_attr "isa" "noavx,avx")
4534 (set_attr "type" "sseiadd")
4535 (set_attr "prefix_data16" "1,*")
4536 (set_attr "prefix" "orig,vex")
4537 (set_attr "mode" "TI")])
4539 (define_insn_and_split "mulv16qi3"
4540 [(set (match_operand:V16QI 0 "register_operand" "")
4541 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4542 (match_operand:V16QI 2 "register_operand" "")))]
4544 && can_create_pseudo_p ()"
4552 for (i = 0; i < 6; ++i)
4553 t[i] = gen_reg_rtx (V16QImode);
4555 /* Unpack data such that we've got a source byte in each low byte of
4556 each word. We don't care what goes into the high byte of each word.
4557 Rather than trying to get zero in there, most convenient is to let
4558 it be a copy of the low byte. */
4559 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
4560 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
4561 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
4562 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
4564 /* Multiply words. The end-of-line annotations here give a picture of what
4565 the output of that instruction looks like. Dot means don't care; the
4566 letters are the bytes of the result with A being the most significant. */
4567 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4568 gen_lowpart (V8HImode, t[0]),
4569 gen_lowpart (V8HImode, t[1])));
4570 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4571 gen_lowpart (V8HImode, t[2]),
4572 gen_lowpart (V8HImode, t[3])));
4574 /* Extract the even bytes and merge them back together. */
4575 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4579 (define_expand "mulv8hi3"
4580 [(set (match_operand:V8HI 0 "register_operand" "")
4581 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4582 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4584 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4586 (define_insn "*mulv8hi3"
4587 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
4588 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
4589 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
4590 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4592 pmullw\t{%2, %0|%0, %2}
4593 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4594 [(set_attr "isa" "noavx,avx")
4595 (set_attr "type" "sseimul")
4596 (set_attr "prefix_data16" "1,*")
4597 (set_attr "prefix" "orig,vex")
4598 (set_attr "mode" "TI")])
4600 (define_expand "<s>mulv8hi3_highpart"
4601 [(set (match_operand:V8HI 0 "register_operand" "")
4606 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4608 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4611 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4613 (define_insn "*<s>mulv8hi3_highpart"
4614 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
4619 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
4621 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
4623 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4625 pmulh<u>w\t{%2, %0|%0, %2}
4626 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
4627 [(set_attr "isa" "noavx,avx")
4628 (set_attr "type" "sseimul")
4629 (set_attr "prefix_data16" "1,*")
4630 (set_attr "prefix" "orig,vex")
4631 (set_attr "mode" "TI")])
4633 (define_expand "sse2_umulv2siv2di3"
4634 [(set (match_operand:V2DI 0 "register_operand" "")
4638 (match_operand:V4SI 1 "nonimmediate_operand" "")
4639 (parallel [(const_int 0) (const_int 2)])))
4642 (match_operand:V4SI 2 "nonimmediate_operand" "")
4643 (parallel [(const_int 0) (const_int 2)])))))]
4645 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4647 (define_insn "*sse2_umulv2siv2di3"
4648 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4652 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4653 (parallel [(const_int 0) (const_int 2)])))
4656 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4657 (parallel [(const_int 0) (const_int 2)])))))]
4658 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4660 pmuludq\t{%2, %0|%0, %2}
4661 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4662 [(set_attr "isa" "noavx,avx")
4663 (set_attr "type" "sseimul")
4664 (set_attr "prefix_data16" "1,*")
4665 (set_attr "prefix" "orig,vex")
4666 (set_attr "mode" "TI")])
4668 (define_expand "sse4_1_mulv2siv2di3"
4669 [(set (match_operand:V2DI 0 "register_operand" "")
4673 (match_operand:V4SI 1 "nonimmediate_operand" "")
4674 (parallel [(const_int 0) (const_int 2)])))
4677 (match_operand:V4SI 2 "nonimmediate_operand" "")
4678 (parallel [(const_int 0) (const_int 2)])))))]
4680 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4682 (define_insn "*sse4_1_mulv2siv2di3"
4683 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4687 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4688 (parallel [(const_int 0) (const_int 2)])))
4691 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4692 (parallel [(const_int 0) (const_int 2)])))))]
4693 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4695 pmuldq\t{%2, %0|%0, %2}
4696 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4697 [(set_attr "isa" "noavx,avx")
4698 (set_attr "type" "sseimul")
4699 (set_attr "prefix_data16" "1,*")
4700 (set_attr "prefix_extra" "1")
4701 (set_attr "prefix" "orig,vex")
4702 (set_attr "mode" "TI")])
4704 (define_expand "sse2_pmaddwd"
4705 [(set (match_operand:V4SI 0 "register_operand" "")
4710 (match_operand:V8HI 1 "nonimmediate_operand" "")
4711 (parallel [(const_int 0)
4717 (match_operand:V8HI 2 "nonimmediate_operand" "")
4718 (parallel [(const_int 0)
4724 (vec_select:V4HI (match_dup 1)
4725 (parallel [(const_int 1)
4730 (vec_select:V4HI (match_dup 2)
4731 (parallel [(const_int 1)
4734 (const_int 7)]))))))]
4736 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4738 (define_insn "*sse2_pmaddwd"
4739 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
4744 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
4745 (parallel [(const_int 0)
4751 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
4752 (parallel [(const_int 0)
4758 (vec_select:V4HI (match_dup 1)
4759 (parallel [(const_int 1)
4764 (vec_select:V4HI (match_dup 2)
4765 (parallel [(const_int 1)
4768 (const_int 7)]))))))]
4769 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4771 pmaddwd\t{%2, %0|%0, %2}
4772 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
4773 [(set_attr "isa" "noavx,avx")
4774 (set_attr "type" "sseiadd")
4775 (set_attr "atom_unit" "simul")
4776 (set_attr "prefix_data16" "1,*")
4777 (set_attr "prefix" "orig,vex")
4778 (set_attr "mode" "TI")])
4780 (define_expand "mulv4si3"
4781 [(set (match_operand:V4SI 0 "register_operand" "")
4782 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
4783 (match_operand:V4SI 2 "register_operand" "")))]
4786 if (TARGET_SSE4_1 || TARGET_AVX)
4787 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
4790 (define_insn "*sse4_1_mulv4si3"
4791 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
4792 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4793 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")))]
4794 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4796 pmulld\t{%2, %0|%0, %2}
4797 vpmulld\t{%2, %1, %0|%0, %1, %2}"
4798 [(set_attr "isa" "noavx,avx")
4799 (set_attr "type" "sseimul")
4800 (set_attr "prefix_extra" "1")
4801 (set_attr "prefix" "orig,vex")
4802 (set_attr "mode" "TI")])
4804 (define_insn_and_split "*sse2_mulv4si3"
4805 [(set (match_operand:V4SI 0 "register_operand" "")
4806 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
4807 (match_operand:V4SI 2 "register_operand" "")))]
4808 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
4809 && can_create_pseudo_p ()"
4814 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
4820 t1 = gen_reg_rtx (V4SImode);
4821 t2 = gen_reg_rtx (V4SImode);
4822 t3 = gen_reg_rtx (V4SImode);
4823 t4 = gen_reg_rtx (V4SImode);
4824 t5 = gen_reg_rtx (V4SImode);
4825 t6 = gen_reg_rtx (V4SImode);
4826 thirtytwo = GEN_INT (32);
4828 /* Multiply elements 2 and 0. */
4829 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
4832 /* Shift both input vectors down one element, so that elements 3
4833 and 1 are now in the slots for elements 2 and 0. For K8, at
4834 least, this is faster than using a shuffle. */
4835 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
4836 gen_lowpart (V1TImode, op1),
4838 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
4839 gen_lowpart (V1TImode, op2),
4841 /* Multiply elements 3 and 1. */
4842 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
4845 /* Move the results in element 2 down to element 1; we don't care
4846 what goes in elements 2 and 3. */
4847 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
4848 const0_rtx, const0_rtx));
4849 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
4850 const0_rtx, const0_rtx));
4852 /* Merge the parts back together. */
4853 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
4857 (define_insn_and_split "mulv2di3"
4858 [(set (match_operand:V2DI 0 "register_operand" "")
4859 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
4860 (match_operand:V2DI 2 "register_operand" "")))]
4862 && can_create_pseudo_p ()"
4867 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
4876 /* op1: A,B,C,D, op2: E,F,G,H */
4877 op1 = gen_lowpart (V4SImode, op1);
4878 op2 = gen_lowpart (V4SImode, op2);
4880 t1 = gen_reg_rtx (V4SImode);
4881 t2 = gen_reg_rtx (V4SImode);
4882 t3 = gen_reg_rtx (V2DImode);
4883 t4 = gen_reg_rtx (V2DImode);
4886 emit_insn (gen_sse2_pshufd_1 (t1, op1,
4892 /* t2: (B*E),(A*F),(D*G),(C*H) */
4893 emit_insn (gen_mulv4si3 (t2, t1, op2));
4895 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
4896 emit_insn (gen_xop_phadddq (t3, t2));
4898 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
4899 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
4901 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
4902 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
4906 t1 = gen_reg_rtx (V2DImode);
4907 t2 = gen_reg_rtx (V2DImode);
4908 t3 = gen_reg_rtx (V2DImode);
4909 t4 = gen_reg_rtx (V2DImode);
4910 t5 = gen_reg_rtx (V2DImode);
4911 t6 = gen_reg_rtx (V2DImode);
4912 thirtytwo = GEN_INT (32);
4914 /* Multiply low parts. */
4915 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
4916 gen_lowpart (V4SImode, op2)));
4918 /* Shift input vectors left 32 bits so we can multiply high parts. */
4919 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
4920 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
4922 /* Multiply high parts by low parts. */
4923 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
4924 gen_lowpart (V4SImode, t3)));
4925 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
4926 gen_lowpart (V4SImode, t2)));
4928 /* Shift them back. */
4929 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
4930 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
4932 /* Add the three parts together. */
4933 emit_insn (gen_addv2di3 (t6, t1, t4));
4934 emit_insn (gen_addv2di3 (op0, t6, t5));
4939 (define_expand "vec_widen_smult_hi_v8hi"
4940 [(match_operand:V4SI 0 "register_operand" "")
4941 (match_operand:V8HI 1 "register_operand" "")
4942 (match_operand:V8HI 2 "register_operand" "")]
4945 rtx op1, op2, t1, t2, dest;
4949 t1 = gen_reg_rtx (V8HImode);
4950 t2 = gen_reg_rtx (V8HImode);
4951 dest = gen_lowpart (V8HImode, operands[0]);
4953 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4954 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
4955 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
4959 (define_expand "vec_widen_smult_lo_v8hi"
4960 [(match_operand:V4SI 0 "register_operand" "")
4961 (match_operand:V8HI 1 "register_operand" "")
4962 (match_operand:V8HI 2 "register_operand" "")]
4965 rtx op1, op2, t1, t2, dest;
4969 t1 = gen_reg_rtx (V8HImode);
4970 t2 = gen_reg_rtx (V8HImode);
4971 dest = gen_lowpart (V8HImode, operands[0]);
4973 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4974 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
4975 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
4979 (define_expand "vec_widen_umult_hi_v8hi"
4980 [(match_operand:V4SI 0 "register_operand" "")
4981 (match_operand:V8HI 1 "register_operand" "")
4982 (match_operand:V8HI 2 "register_operand" "")]
4985 rtx op1, op2, t1, t2, dest;
4989 t1 = gen_reg_rtx (V8HImode);
4990 t2 = gen_reg_rtx (V8HImode);
4991 dest = gen_lowpart (V8HImode, operands[0]);
4993 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4994 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
4995 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
4999 (define_expand "vec_widen_umult_lo_v8hi"
5000 [(match_operand:V4SI 0 "register_operand" "")
5001 (match_operand:V8HI 1 "register_operand" "")
5002 (match_operand:V8HI 2 "register_operand" "")]
5005 rtx op1, op2, t1, t2, dest;
5009 t1 = gen_reg_rtx (V8HImode);
5010 t2 = gen_reg_rtx (V8HImode);
5011 dest = gen_lowpart (V8HImode, operands[0]);
5013 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5014 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5015 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5019 (define_expand "vec_widen_smult_hi_v4si"
5020 [(match_operand:V2DI 0 "register_operand" "")
5021 (match_operand:V4SI 1 "register_operand" "")
5022 (match_operand:V4SI 2 "register_operand" "")]
5027 t1 = gen_reg_rtx (V4SImode);
5028 t2 = gen_reg_rtx (V4SImode);
5030 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5035 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5040 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5044 (define_expand "vec_widen_smult_lo_v4si"
5045 [(match_operand:V2DI 0 "register_operand" "")
5046 (match_operand:V4SI 1 "register_operand" "")
5047 (match_operand:V4SI 2 "register_operand" "")]
5052 t1 = gen_reg_rtx (V4SImode);
5053 t2 = gen_reg_rtx (V4SImode);
5055 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5060 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5065 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5069 (define_expand "vec_widen_umult_hi_v4si"
5070 [(match_operand:V2DI 0 "register_operand" "")
5071 (match_operand:V4SI 1 "register_operand" "")
5072 (match_operand:V4SI 2 "register_operand" "")]
5075 rtx op1, op2, t1, t2;
5079 t1 = gen_reg_rtx (V4SImode);
5080 t2 = gen_reg_rtx (V4SImode);
5082 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5083 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5084 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5088 (define_expand "vec_widen_umult_lo_v4si"
5089 [(match_operand:V2DI 0 "register_operand" "")
5090 (match_operand:V4SI 1 "register_operand" "")
5091 (match_operand:V4SI 2 "register_operand" "")]
5094 rtx op1, op2, t1, t2;
5098 t1 = gen_reg_rtx (V4SImode);
5099 t2 = gen_reg_rtx (V4SImode);
5101 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5102 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5103 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5107 (define_expand "sdot_prodv8hi"
5108 [(match_operand:V4SI 0 "register_operand" "")
5109 (match_operand:V8HI 1 "register_operand" "")
5110 (match_operand:V8HI 2 "register_operand" "")
5111 (match_operand:V4SI 3 "register_operand" "")]
5114 rtx t = gen_reg_rtx (V4SImode);
5115 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5116 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5120 (define_expand "udot_prodv4si"
5121 [(match_operand:V2DI 0 "register_operand" "")
5122 (match_operand:V4SI 1 "register_operand" "")
5123 (match_operand:V4SI 2 "register_operand" "")
5124 (match_operand:V2DI 3 "register_operand" "")]
5129 t1 = gen_reg_rtx (V2DImode);
5130 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5131 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5133 t2 = gen_reg_rtx (V4SImode);
5134 t3 = gen_reg_rtx (V4SImode);
5135 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5136 gen_lowpart (V1TImode, operands[1]),
5138 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5139 gen_lowpart (V1TImode, operands[2]),
5142 t4 = gen_reg_rtx (V2DImode);
5143 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5145 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5149 (define_insn "ashr<mode>3"
5150 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5152 (match_operand:VI24_128 1 "register_operand" "0,x")
5153 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5156 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5157 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5158 [(set_attr "isa" "noavx,avx")
5159 (set_attr "type" "sseishft")
5160 (set (attr "length_immediate")
5161 (if_then_else (match_operand 2 "const_int_operand" "")
5163 (const_string "0")))
5164 (set_attr "prefix_data16" "1,*")
5165 (set_attr "prefix" "orig,vex")
5166 (set_attr "mode" "TI")])
5168 (define_insn "lshr<mode>3"
5169 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5171 (match_operand:VI248_128 1 "register_operand" "0,x")
5172 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5175 psrl<ssemodesuffix>\t{%2, %0|%0, %2}
5176 vpsrl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5177 [(set_attr "isa" "noavx,avx")
5178 (set_attr "type" "sseishft")
5179 (set (attr "length_immediate")
5180 (if_then_else (match_operand 2 "const_int_operand" "")
5182 (const_string "0")))
5183 (set_attr "prefix_data16" "1,*")
5184 (set_attr "prefix" "orig,vex")
5185 (set_attr "mode" "TI")])
5187 (define_insn "ashl<mode>3"
5188 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5190 (match_operand:VI248_128 1 "register_operand" "0,x")
5191 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5194 psll<ssemodesuffix>\t{%2, %0|%0, %2}
5195 vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5196 [(set_attr "isa" "noavx,avx")
5197 (set_attr "type" "sseishft")
5198 (set (attr "length_immediate")
5199 (if_then_else (match_operand 2 "const_int_operand" "")
5201 (const_string "0")))
5202 (set_attr "prefix_data16" "1,*")
5203 (set_attr "prefix" "orig,vex")
5204 (set_attr "mode" "TI")])
5206 (define_expand "vec_shl_<mode>"
5207 [(set (match_operand:VI_128 0 "register_operand" "")
5209 (match_operand:VI_128 1 "register_operand" "")
5210 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5213 operands[0] = gen_lowpart (V1TImode, operands[0]);
5214 operands[1] = gen_lowpart (V1TImode, operands[1]);
5217 (define_insn "sse2_ashlv1ti3"
5218 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5220 (match_operand:V1TI 1 "register_operand" "0,x")
5221 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5224 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5226 switch (which_alternative)
5229 return "pslldq\t{%2, %0|%0, %2}";
5231 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5236 [(set_attr "isa" "noavx,avx")
5237 (set_attr "type" "sseishft")
5238 (set_attr "length_immediate" "1")
5239 (set_attr "prefix_data16" "1,*")
5240 (set_attr "prefix" "orig,vex")
5241 (set_attr "mode" "TI")])
5243 (define_expand "vec_shr_<mode>"
5244 [(set (match_operand:VI_128 0 "register_operand" "")
5246 (match_operand:VI_128 1 "register_operand" "")
5247 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5250 operands[0] = gen_lowpart (V1TImode, operands[0]);
5251 operands[1] = gen_lowpart (V1TImode, operands[1]);
5254 (define_insn "sse2_lshrv1ti3"
5255 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5257 (match_operand:V1TI 1 "register_operand" "0,x")
5258 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5261 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5263 switch (which_alternative)
5266 return "psrldq\t{%2, %0|%0, %2}";
5268 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5273 [(set_attr "isa" "noavx,avx")
5274 (set_attr "type" "sseishft")
5275 (set_attr "length_immediate" "1")
5276 (set_attr "atom_unit" "sishuf")
5277 (set_attr "prefix_data16" "1,*")
5278 (set_attr "prefix" "orig,vex")
5279 (set_attr "mode" "TI")])
5281 (define_insn "*sse4_1_<code><mode>3"
5282 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5284 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5285 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5286 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5288 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5289 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5290 [(set_attr "isa" "noavx,avx")
5291 (set_attr "type" "sseiadd")
5292 (set_attr "prefix_extra" "1,*")
5293 (set_attr "prefix" "orig,vex")
5294 (set_attr "mode" "TI")])
5296 (define_insn "*<code>v8hi3"
5297 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5299 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5300 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5301 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5303 p<maxmin_int>w\t{%2, %0|%0, %2}
5304 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5305 [(set_attr "isa" "noavx,avx")
5306 (set_attr "type" "sseiadd")
5307 (set_attr "prefix_data16" "1,*")
5308 (set_attr "prefix_extra" "*,1")
5309 (set_attr "prefix" "orig,vex")
5310 (set_attr "mode" "TI")])
5312 (define_expand "smax<mode>3"
5313 [(set (match_operand:VI14_128 0 "register_operand" "")
5314 (smax:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5315 (match_operand:VI14_128 2 "register_operand" "")))]
5319 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5325 xops[0] = operands[0];
5326 xops[1] = operands[1];
5327 xops[2] = operands[2];
5328 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5329 xops[4] = operands[1];
5330 xops[5] = operands[2];
5331 ok = ix86_expand_int_vcond (xops);
5337 (define_expand "smin<mode>3"
5338 [(set (match_operand:VI14_128 0 "register_operand" "")
5339 (smin:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5340 (match_operand:VI14_128 2 "register_operand" "")))]
5344 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5350 xops[0] = operands[0];
5351 xops[1] = operands[2];
5352 xops[2] = operands[1];
5353 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5354 xops[4] = operands[1];
5355 xops[5] = operands[2];
5356 ok = ix86_expand_int_vcond (xops);
5362 (define_expand "<code>v8hi3"
5363 [(set (match_operand:V8HI 0 "register_operand" "")
5365 (match_operand:V8HI 1 "nonimmediate_operand" "")
5366 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5368 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5370 (define_expand "smaxv2di3"
5371 [(set (match_operand:V2DI 0 "register_operand" "")
5372 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
5373 (match_operand:V2DI 2 "register_operand" "")))]
5379 xops[0] = operands[0];
5380 xops[1] = operands[1];
5381 xops[2] = operands[2];
5382 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5383 xops[4] = operands[1];
5384 xops[5] = operands[2];
5385 ok = ix86_expand_int_vcond (xops);
5390 (define_expand "sminv2di3"
5391 [(set (match_operand:V2DI 0 "register_operand" "")
5392 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
5393 (match_operand:V2DI 2 "register_operand" "")))]
5399 xops[0] = operands[0];
5400 xops[1] = operands[2];
5401 xops[2] = operands[1];
5402 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5403 xops[4] = operands[1];
5404 xops[5] = operands[2];
5405 ok = ix86_expand_int_vcond (xops);
5410 (define_insn "*sse4_1_<code><mode>3"
5411 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5413 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
5414 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
5415 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5417 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5418 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5419 [(set_attr "isa" "noavx,avx")
5420 (set_attr "type" "sseiadd")
5421 (set_attr "prefix_extra" "1,*")
5422 (set_attr "prefix" "orig,vex")
5423 (set_attr "mode" "TI")])
5425 (define_insn "*<code>v16qi3"
5426 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5428 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
5429 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
5430 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5432 p<maxmin_int>b\t{%2, %0|%0, %2}
5433 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
5434 [(set_attr "isa" "noavx,avx")
5435 (set_attr "type" "sseiadd")
5436 (set_attr "prefix_data16" "1,*")
5437 (set_attr "prefix_extra" "*,1")
5438 (set_attr "prefix" "orig,vex")
5439 (set_attr "mode" "TI")])
5441 (define_expand "<code>v16qi3"
5442 [(set (match_operand:V16QI 0 "register_operand" "")
5444 (match_operand:V16QI 1 "nonimmediate_operand" "")
5445 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5447 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5449 (define_expand "umaxv8hi3"
5450 [(set (match_operand:V8HI 0 "register_operand" "")
5451 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5452 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5456 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5459 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5460 if (rtx_equal_p (op3, op2))
5461 op3 = gen_reg_rtx (V8HImode);
5462 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5463 emit_insn (gen_addv8hi3 (op0, op3, op2));
5468 (define_expand "umaxv4si3"
5469 [(set (match_operand:V4SI 0 "register_operand" "")
5470 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5471 (match_operand:V4SI 2 "register_operand" "")))]
5475 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5481 xops[0] = operands[0];
5482 xops[1] = operands[1];
5483 xops[2] = operands[2];
5484 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5485 xops[4] = operands[1];
5486 xops[5] = operands[2];
5487 ok = ix86_expand_int_vcond (xops);
5493 (define_expand "umin<mode>3"
5494 [(set (match_operand:VI24_128 0 "register_operand" "")
5495 (umin:VI24_128 (match_operand:VI24_128 1 "register_operand" "")
5496 (match_operand:VI24_128 2 "register_operand" "")))]
5500 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5506 xops[0] = operands[0];
5507 xops[1] = operands[2];
5508 xops[2] = operands[1];
5509 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5510 xops[4] = operands[1];
5511 xops[5] = operands[2];
5512 ok = ix86_expand_int_vcond (xops);
5518 (define_expand "umaxv2di3"
5519 [(set (match_operand:V2DI 0 "register_operand" "")
5520 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
5521 (match_operand:V2DI 2 "register_operand" "")))]
5527 xops[0] = operands[0];
5528 xops[1] = operands[1];
5529 xops[2] = operands[2];
5530 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5531 xops[4] = operands[1];
5532 xops[5] = operands[2];
5533 ok = ix86_expand_int_vcond (xops);
5538 (define_expand "uminv2di3"
5539 [(set (match_operand:V2DI 0 "register_operand" "")
5540 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
5541 (match_operand:V2DI 2 "register_operand" "")))]
5547 xops[0] = operands[0];
5548 xops[1] = operands[2];
5549 xops[2] = operands[1];
5550 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5551 xops[4] = operands[1];
5552 xops[5] = operands[2];
5553 ok = ix86_expand_int_vcond (xops);
5558 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5560 ;; Parallel integral comparisons
5562 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5564 (define_insn "*sse4_1_eqv2di3"
5565 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5567 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
5568 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
5569 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
5571 pcmpeqq\t{%2, %0|%0, %2}
5572 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
5573 [(set_attr "isa" "noavx,avx")
5574 (set_attr "type" "ssecmp")
5575 (set_attr "prefix_extra" "1")
5576 (set_attr "prefix" "orig,vex")
5577 (set_attr "mode" "TI")])
5579 (define_insn "*sse2_eq<mode>3"
5580 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
5582 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
5583 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
5584 "TARGET_SSE2 && !TARGET_XOP
5585 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5587 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
5588 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5589 [(set_attr "isa" "noavx,avx")
5590 (set_attr "type" "ssecmp")
5591 (set_attr "prefix_data16" "1,*")
5592 (set_attr "prefix" "orig,vex")
5593 (set_attr "mode" "TI")])
5595 (define_expand "sse2_eq<mode>3"
5596 [(set (match_operand:VI124_128 0 "register_operand" "")
5598 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5599 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5600 "TARGET_SSE2 && !TARGET_XOP "
5601 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5603 (define_expand "sse4_1_eqv2di3"
5604 [(set (match_operand:V2DI 0 "register_operand" "")
5606 (match_operand:V2DI 1 "nonimmediate_operand" "")
5607 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
5609 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
5611 (define_insn "sse4_2_gtv2di3"
5612 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5614 (match_operand:V2DI 1 "register_operand" "0,x")
5615 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
5618 pcmpgtq\t{%2, %0|%0, %2}
5619 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
5620 [(set_attr "isa" "noavx,avx")
5621 (set_attr "type" "ssecmp")
5622 (set_attr "prefix_extra" "1")
5623 (set_attr "prefix" "orig,vex")
5624 (set_attr "mode" "TI")])
5626 (define_insn "sse2_gt<mode>3"
5627 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
5629 (match_operand:VI124_128 1 "register_operand" "0,x")
5630 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
5631 "TARGET_SSE2 && !TARGET_XOP"
5633 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
5634 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5635 [(set_attr "isa" "noavx,avx")
5636 (set_attr "type" "ssecmp")
5637 (set_attr "prefix_data16" "1,*")
5638 (set_attr "prefix" "orig,vex")
5639 (set_attr "mode" "TI")])
5641 (define_expand "vcond<mode>"
5642 [(set (match_operand:VI124_128 0 "register_operand" "")
5643 (if_then_else:VI124_128
5644 (match_operator 3 ""
5645 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
5646 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
5647 (match_operand:VI124_128 1 "general_operand" "")
5648 (match_operand:VI124_128 2 "general_operand" "")))]
5651 bool ok = ix86_expand_int_vcond (operands);
5656 (define_expand "vcondv2di"
5657 [(set (match_operand:V2DI 0 "register_operand" "")
5659 (match_operator 3 ""
5660 [(match_operand:V2DI 4 "nonimmediate_operand" "")
5661 (match_operand:V2DI 5 "nonimmediate_operand" "")])
5662 (match_operand:V2DI 1 "general_operand" "")
5663 (match_operand:V2DI 2 "general_operand" "")))]
5666 bool ok = ix86_expand_int_vcond (operands);
5671 (define_expand "vcondu<mode>"
5672 [(set (match_operand:VI124_128 0 "register_operand" "")
5673 (if_then_else:VI124_128
5674 (match_operator 3 ""
5675 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
5676 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
5677 (match_operand:VI124_128 1 "general_operand" "")
5678 (match_operand:VI124_128 2 "general_operand" "")))]
5681 bool ok = ix86_expand_int_vcond (operands);
5686 (define_expand "vconduv2di"
5687 [(set (match_operand:V2DI 0 "register_operand" "")
5689 (match_operator 3 ""
5690 [(match_operand:V2DI 4 "nonimmediate_operand" "")
5691 (match_operand:V2DI 5 "nonimmediate_operand" "")])
5692 (match_operand:V2DI 1 "general_operand" "")
5693 (match_operand:V2DI 2 "general_operand" "")))]
5696 bool ok = ix86_expand_int_vcond (operands);
5701 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5703 ;; Parallel bitwise logical operations
5705 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5707 (define_expand "one_cmpl<mode>2"
5708 [(set (match_operand:VI 0 "register_operand" "")
5709 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
5713 int i, n = GET_MODE_NUNITS (<MODE>mode);
5714 rtvec v = rtvec_alloc (n);
5716 for (i = 0; i < n; ++i)
5717 RTVEC_ELT (v, i) = constm1_rtx;
5719 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
5722 (define_expand "sse2_andnot<mode>3"
5723 [(set (match_operand:VI_128 0 "register_operand" "")
5725 (not:VI_128 (match_operand:VI_128 1 "register_operand" ""))
5726 (match_operand:VI_128 2 "nonimmediate_operand" "")))]
5729 (define_insn "*andnot<mode>3"
5730 [(set (match_operand:VI 0 "register_operand" "=x,x")
5732 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
5733 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
5736 static char buf[32];
5739 = (get_attr_mode (insn) == MODE_TI) ? "pandn" : "andnps";
5741 switch (which_alternative)
5744 ops = "%s\t{%%2, %%0|%%0, %%2}";
5747 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5753 snprintf (buf, sizeof (buf), ops, tmp);
5756 [(set_attr "isa" "noavx,avx")
5757 (set_attr "type" "sselog")
5758 (set (attr "prefix_data16")
5760 (and (eq_attr "alternative" "0")
5761 (eq_attr "mode" "TI"))
5763 (const_string "*")))
5764 (set_attr "prefix" "orig,vex")
5766 (cond [(ne (symbol_ref "GET_MODE_SIZE (<MODE>mode) > 128") (const_int 0))
5767 (const_string "V8SF")
5768 (ne (symbol_ref "TARGET_SSE2") (const_int 0))
5771 (const_string "V4SF")))])
5773 (define_expand "<code><mode>3"
5774 [(set (match_operand:VI 0 "register_operand" "")
5776 (match_operand:VI 1 "nonimmediate_operand" "")
5777 (match_operand:VI 2 "nonimmediate_operand" "")))]
5779 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5781 (define_insn "*<code><mode>3"
5782 [(set (match_operand:VI 0 "register_operand" "=x,x")
5784 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
5785 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
5787 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5789 static char buf[32];
5792 = (get_attr_mode (insn) == MODE_TI) ? "p<logic>" : "<logic>ps";
5794 switch (which_alternative)
5797 ops = "%s\t{%%2, %%0|%%0, %%2}";
5800 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5806 snprintf (buf, sizeof (buf), ops, tmp);
5809 [(set_attr "isa" "noavx,avx")
5810 (set_attr "type" "sselog")
5811 (set (attr "prefix_data16")
5813 (and (eq_attr "alternative" "0")
5814 (eq_attr "mode" "TI"))
5816 (const_string "*")))
5817 (set_attr "prefix" "orig,vex")
5819 (cond [(ne (symbol_ref "GET_MODE_SIZE (<MODE>mode) > 128") (const_int 0))
5820 (const_string "V8SF")
5821 (ne (symbol_ref "TARGET_SSE2") (const_int 0))
5824 (const_string "V4SF")))])
5826 (define_insn "*andnottf3"
5827 [(set (match_operand:TF 0 "register_operand" "=x,x")
5829 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
5830 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
5833 pandn\t{%2, %0|%0, %2}
5834 vpandn\t{%2, %1, %0|%0, %1, %2}"
5835 [(set_attr "isa" "noavx,avx")
5836 (set_attr "type" "sselog")
5837 (set_attr "prefix_data16" "1,*")
5838 (set_attr "prefix" "orig,vex")
5839 (set_attr "mode" "TI")])
5841 (define_expand "<code>tf3"
5842 [(set (match_operand:TF 0 "register_operand" "")
5844 (match_operand:TF 1 "nonimmediate_operand" "")
5845 (match_operand:TF 2 "nonimmediate_operand" "")))]
5847 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
5849 (define_insn "*<code>tf3"
5850 [(set (match_operand:TF 0 "register_operand" "=x,x")
5852 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
5853 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
5855 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
5857 p<logic>\t{%2, %0|%0, %2}
5858 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
5859 [(set_attr "isa" "noavx,avx")
5860 (set_attr "type" "sselog")
5861 (set_attr "prefix_data16" "1,*")
5862 (set_attr "prefix" "orig,vex")
5863 (set_attr "mode" "TI")])
5865 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5867 ;; Parallel integral element swizzling
5869 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5871 (define_expand "vec_pack_trunc_<mode>"
5872 [(match_operand:<ssepackmode> 0 "register_operand" "")
5873 (match_operand:VI248_128 1 "register_operand" "")
5874 (match_operand:VI248_128 2 "register_operand" "")]
5877 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
5878 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
5879 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
5883 (define_insn "sse2_packsswb"
5884 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5887 (match_operand:V8HI 1 "register_operand" "0,x"))
5889 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))]
5892 packsswb\t{%2, %0|%0, %2}
5893 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
5894 [(set_attr "isa" "noavx,avx")
5895 (set_attr "type" "sselog")
5896 (set_attr "prefix_data16" "1,*")
5897 (set_attr "prefix" "orig,vex")
5898 (set_attr "mode" "TI")])
5900 (define_insn "sse2_packssdw"
5901 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5904 (match_operand:V4SI 1 "register_operand" "0,x"))
5906 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
5909 packssdw\t{%2, %0|%0, %2}
5910 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
5911 [(set_attr "isa" "noavx,avx")
5912 (set_attr "type" "sselog")
5913 (set_attr "prefix_data16" "1,*")
5914 (set_attr "prefix" "orig,vex")
5915 (set_attr "mode" "TI")])
5917 (define_insn "sse2_packuswb"
5918 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5921 (match_operand:V8HI 1 "register_operand" "0,x"))
5923 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))]
5926 packuswb\t{%2, %0|%0, %2}
5927 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
5928 [(set_attr "isa" "noavx,avx")
5929 (set_attr "type" "sselog")
5930 (set_attr "prefix_data16" "1,*")
5931 (set_attr "prefix" "orig,vex")
5932 (set_attr "mode" "TI")])
5934 (define_insn "vec_interleave_highv16qi"
5935 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5938 (match_operand:V16QI 1 "register_operand" "0,x")
5939 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
5940 (parallel [(const_int 8) (const_int 24)
5941 (const_int 9) (const_int 25)
5942 (const_int 10) (const_int 26)
5943 (const_int 11) (const_int 27)
5944 (const_int 12) (const_int 28)
5945 (const_int 13) (const_int 29)
5946 (const_int 14) (const_int 30)
5947 (const_int 15) (const_int 31)])))]
5950 punpckhbw\t{%2, %0|%0, %2}
5951 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
5952 [(set_attr "isa" "noavx,avx")
5953 (set_attr "type" "sselog")
5954 (set_attr "prefix_data16" "1,*")
5955 (set_attr "prefix" "orig,vex")
5956 (set_attr "mode" "TI")])
5958 (define_insn "vec_interleave_lowv16qi"
5959 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5962 (match_operand:V16QI 1 "register_operand" "0,x")
5963 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
5964 (parallel [(const_int 0) (const_int 16)
5965 (const_int 1) (const_int 17)
5966 (const_int 2) (const_int 18)
5967 (const_int 3) (const_int 19)
5968 (const_int 4) (const_int 20)
5969 (const_int 5) (const_int 21)
5970 (const_int 6) (const_int 22)
5971 (const_int 7) (const_int 23)])))]
5974 punpcklbw\t{%2, %0|%0, %2}
5975 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
5976 [(set_attr "isa" "noavx,avx")
5977 (set_attr "type" "sselog")
5978 (set_attr "prefix_data16" "1,*")
5979 (set_attr "prefix" "orig,vex")
5980 (set_attr "mode" "TI")])
5982 (define_insn "vec_interleave_highv8hi"
5983 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5986 (match_operand:V8HI 1 "register_operand" "0,x")
5987 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
5988 (parallel [(const_int 4) (const_int 12)
5989 (const_int 5) (const_int 13)
5990 (const_int 6) (const_int 14)
5991 (const_int 7) (const_int 15)])))]
5994 punpckhwd\t{%2, %0|%0, %2}
5995 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
5996 [(set_attr "isa" "noavx,avx")
5997 (set_attr "type" "sselog")
5998 (set_attr "prefix_data16" "1,*")
5999 (set_attr "prefix" "orig,vex")
6000 (set_attr "mode" "TI")])
6002 (define_insn "vec_interleave_lowv8hi"
6003 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6006 (match_operand:V8HI 1 "register_operand" "0,x")
6007 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6008 (parallel [(const_int 0) (const_int 8)
6009 (const_int 1) (const_int 9)
6010 (const_int 2) (const_int 10)
6011 (const_int 3) (const_int 11)])))]
6014 punpcklwd\t{%2, %0|%0, %2}
6015 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6016 [(set_attr "isa" "noavx,avx")
6017 (set_attr "type" "sselog")
6018 (set_attr "prefix_data16" "1,*")
6019 (set_attr "prefix" "orig,vex")
6020 (set_attr "mode" "TI")])
6022 (define_insn "vec_interleave_highv4si"
6023 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6026 (match_operand:V4SI 1 "register_operand" "0,x")
6027 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6028 (parallel [(const_int 2) (const_int 6)
6029 (const_int 3) (const_int 7)])))]
6032 punpckhdq\t{%2, %0|%0, %2}
6033 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6034 [(set_attr "isa" "noavx,avx")
6035 (set_attr "type" "sselog")
6036 (set_attr "prefix_data16" "1,*")
6037 (set_attr "prefix" "orig,vex")
6038 (set_attr "mode" "TI")])
6040 (define_insn "vec_interleave_lowv4si"
6041 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6044 (match_operand:V4SI 1 "register_operand" "0,x")
6045 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6046 (parallel [(const_int 0) (const_int 4)
6047 (const_int 1) (const_int 5)])))]
6050 punpckldq\t{%2, %0|%0, %2}
6051 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6052 [(set_attr "isa" "noavx,avx")
6053 (set_attr "type" "sselog")
6054 (set_attr "prefix_data16" "1,*")
6055 (set_attr "prefix" "orig,vex")
6056 (set_attr "mode" "TI")])
6058 (define_insn "sse4_1_pinsrb"
6059 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
6061 (vec_duplicate:V16QI
6062 (match_operand:QI 2 "nonimmediate_operand" "r,m,r,m"))
6063 (match_operand:V16QI 1 "register_operand" "0,0,x,x")
6064 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n,n,n,n")))]
6067 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6069 switch (which_alternative)
6072 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6074 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6076 return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6078 return "vpinsrb\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6083 [(set_attr "isa" "noavx,noavx,avx,avx")
6084 (set_attr "type" "sselog")
6085 (set_attr "prefix_extra" "1")
6086 (set_attr "length_immediate" "1")
6087 (set_attr "prefix" "orig,orig,vex,vex")
6088 (set_attr "mode" "TI")])
6090 (define_insn "sse2_pinsrw"
6091 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
6094 (match_operand:HI 2 "nonimmediate_operand" "r,m,r,m"))
6095 (match_operand:V8HI 1 "register_operand" "0,0,x,x")
6096 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n,n,n,n")))]
6099 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6101 switch (which_alternative)
6104 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6106 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6108 return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6110 return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6115 [(set_attr "isa" "noavx,noavx,avx,avx")
6116 (set_attr "type" "sselog")
6117 (set_attr "prefix_data16" "1,1,*,*")
6118 (set_attr "prefix_extra" "*,*,1,1")
6119 (set_attr "length_immediate" "1")
6120 (set_attr "prefix" "orig,orig,vex,vex")
6121 (set_attr "mode" "TI")])
6123 ;; It must come before sse2_loadld since it is preferred.
6124 (define_insn "sse4_1_pinsrd"
6125 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6128 (match_operand:SI 2 "nonimmediate_operand" "rm,rm"))
6129 (match_operand:V4SI 1 "register_operand" "0,x")
6130 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n,n")))]
6133 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6135 switch (which_alternative)
6138 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6140 return "vpinsrd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6145 [(set_attr "isa" "noavx,avx")
6146 (set_attr "type" "sselog")
6147 (set_attr "prefix_extra" "1")
6148 (set_attr "length_immediate" "1")
6149 (set_attr "prefix" "orig,vex")
6150 (set_attr "mode" "TI")])
6152 (define_insn "sse4_1_pinsrq"
6153 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6156 (match_operand:DI 2 "nonimmediate_operand" "rm,rm"))
6157 (match_operand:V2DI 1 "register_operand" "0,x")
6158 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n,n")))]
6159 "TARGET_SSE4_1 && TARGET_64BIT"
6161 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6163 switch (which_alternative)
6166 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6168 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6173 [(set_attr "isa" "noavx,avx")
6174 (set_attr "type" "sselog")
6175 (set_attr "prefix_rex" "1,*")
6176 (set_attr "prefix_extra" "1")
6177 (set_attr "length_immediate" "1")
6178 (set_attr "prefix" "orig,vex")
6179 (set_attr "mode" "TI")])
6181 (define_insn "*sse4_1_pextrb_<mode>"
6182 [(set (match_operand:SWI48 0 "register_operand" "=r")
6185 (match_operand:V16QI 1 "register_operand" "x")
6186 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6188 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6189 [(set_attr "type" "sselog")
6190 (set_attr "prefix_extra" "1")
6191 (set_attr "length_immediate" "1")
6192 (set_attr "prefix" "maybe_vex")
6193 (set_attr "mode" "TI")])
6195 (define_insn "*sse4_1_pextrb_memory"
6196 [(set (match_operand:QI 0 "memory_operand" "=m")
6198 (match_operand:V16QI 1 "register_operand" "x")
6199 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6201 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6202 [(set_attr "type" "sselog")
6203 (set_attr "prefix_extra" "1")
6204 (set_attr "length_immediate" "1")
6205 (set_attr "prefix" "maybe_vex")
6206 (set_attr "mode" "TI")])
6208 (define_insn "*sse2_pextrw_<mode>"
6209 [(set (match_operand:SWI48 0 "register_operand" "=r")
6212 (match_operand:V8HI 1 "register_operand" "x")
6213 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6215 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6216 [(set_attr "type" "sselog")
6217 (set_attr "prefix_data16" "1")
6218 (set_attr "length_immediate" "1")
6219 (set_attr "prefix" "maybe_vex")
6220 (set_attr "mode" "TI")])
6222 (define_insn "*sse4_1_pextrw_memory"
6223 [(set (match_operand:HI 0 "memory_operand" "=m")
6225 (match_operand:V8HI 1 "register_operand" "x")
6226 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6228 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6229 [(set_attr "type" "sselog")
6230 (set_attr "prefix_extra" "1")
6231 (set_attr "length_immediate" "1")
6232 (set_attr "prefix" "maybe_vex")
6233 (set_attr "mode" "TI")])
6235 (define_insn "*sse4_1_pextrd"
6236 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6238 (match_operand:V4SI 1 "register_operand" "x")
6239 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6241 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6242 [(set_attr "type" "sselog")
6243 (set_attr "prefix_extra" "1")
6244 (set_attr "length_immediate" "1")
6245 (set_attr "prefix" "maybe_vex")
6246 (set_attr "mode" "TI")])
6248 (define_insn "*sse4_1_pextrd_zext"
6249 [(set (match_operand:DI 0 "register_operand" "=r")
6252 (match_operand:V4SI 1 "register_operand" "x")
6253 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
6254 "TARGET_64BIT && TARGET_SSE4_1"
6255 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
6256 [(set_attr "type" "sselog")
6257 (set_attr "prefix_extra" "1")
6258 (set_attr "length_immediate" "1")
6259 (set_attr "prefix" "maybe_vex")
6260 (set_attr "mode" "TI")])
6262 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6263 (define_insn "*sse4_1_pextrq"
6264 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6266 (match_operand:V2DI 1 "register_operand" "x")
6267 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6268 "TARGET_SSE4_1 && TARGET_64BIT"
6269 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6270 [(set_attr "type" "sselog")
6271 (set_attr "prefix_rex" "1")
6272 (set_attr "prefix_extra" "1")
6273 (set_attr "length_immediate" "1")
6274 (set_attr "prefix" "maybe_vex")
6275 (set_attr "mode" "TI")])
6277 (define_expand "sse2_pshufd"
6278 [(match_operand:V4SI 0 "register_operand" "")
6279 (match_operand:V4SI 1 "nonimmediate_operand" "")
6280 (match_operand:SI 2 "const_int_operand" "")]
6283 int mask = INTVAL (operands[2]);
6284 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6285 GEN_INT ((mask >> 0) & 3),
6286 GEN_INT ((mask >> 2) & 3),
6287 GEN_INT ((mask >> 4) & 3),
6288 GEN_INT ((mask >> 6) & 3)));
6292 (define_insn "sse2_pshufd_1"
6293 [(set (match_operand:V4SI 0 "register_operand" "=x")
6295 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6296 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6297 (match_operand 3 "const_0_to_3_operand" "")
6298 (match_operand 4 "const_0_to_3_operand" "")
6299 (match_operand 5 "const_0_to_3_operand" "")])))]
6303 mask |= INTVAL (operands[2]) << 0;
6304 mask |= INTVAL (operands[3]) << 2;
6305 mask |= INTVAL (operands[4]) << 4;
6306 mask |= INTVAL (operands[5]) << 6;
6307 operands[2] = GEN_INT (mask);
6309 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6311 [(set_attr "type" "sselog1")
6312 (set_attr "prefix_data16" "1")
6313 (set_attr "prefix" "maybe_vex")
6314 (set_attr "length_immediate" "1")
6315 (set_attr "mode" "TI")])
6317 (define_expand "sse2_pshuflw"
6318 [(match_operand:V8HI 0 "register_operand" "")
6319 (match_operand:V8HI 1 "nonimmediate_operand" "")
6320 (match_operand:SI 2 "const_int_operand" "")]
6323 int mask = INTVAL (operands[2]);
6324 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6325 GEN_INT ((mask >> 0) & 3),
6326 GEN_INT ((mask >> 2) & 3),
6327 GEN_INT ((mask >> 4) & 3),
6328 GEN_INT ((mask >> 6) & 3)));
6332 (define_insn "sse2_pshuflw_1"
6333 [(set (match_operand:V8HI 0 "register_operand" "=x")
6335 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6336 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6337 (match_operand 3 "const_0_to_3_operand" "")
6338 (match_operand 4 "const_0_to_3_operand" "")
6339 (match_operand 5 "const_0_to_3_operand" "")
6347 mask |= INTVAL (operands[2]) << 0;
6348 mask |= INTVAL (operands[3]) << 2;
6349 mask |= INTVAL (operands[4]) << 4;
6350 mask |= INTVAL (operands[5]) << 6;
6351 operands[2] = GEN_INT (mask);
6353 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6355 [(set_attr "type" "sselog")
6356 (set_attr "prefix_data16" "0")
6357 (set_attr "prefix_rep" "1")
6358 (set_attr "prefix" "maybe_vex")
6359 (set_attr "length_immediate" "1")
6360 (set_attr "mode" "TI")])
6362 (define_expand "sse2_pshufhw"
6363 [(match_operand:V8HI 0 "register_operand" "")
6364 (match_operand:V8HI 1 "nonimmediate_operand" "")
6365 (match_operand:SI 2 "const_int_operand" "")]
6368 int mask = INTVAL (operands[2]);
6369 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
6370 GEN_INT (((mask >> 0) & 3) + 4),
6371 GEN_INT (((mask >> 2) & 3) + 4),
6372 GEN_INT (((mask >> 4) & 3) + 4),
6373 GEN_INT (((mask >> 6) & 3) + 4)));
6377 (define_insn "sse2_pshufhw_1"
6378 [(set (match_operand:V8HI 0 "register_operand" "=x")
6380 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6381 (parallel [(const_int 0)
6385 (match_operand 2 "const_4_to_7_operand" "")
6386 (match_operand 3 "const_4_to_7_operand" "")
6387 (match_operand 4 "const_4_to_7_operand" "")
6388 (match_operand 5 "const_4_to_7_operand" "")])))]
6392 mask |= (INTVAL (operands[2]) - 4) << 0;
6393 mask |= (INTVAL (operands[3]) - 4) << 2;
6394 mask |= (INTVAL (operands[4]) - 4) << 4;
6395 mask |= (INTVAL (operands[5]) - 4) << 6;
6396 operands[2] = GEN_INT (mask);
6398 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
6400 [(set_attr "type" "sselog")
6401 (set_attr "prefix_rep" "1")
6402 (set_attr "prefix_data16" "0")
6403 (set_attr "prefix" "maybe_vex")
6404 (set_attr "length_immediate" "1")
6405 (set_attr "mode" "TI")])
6407 (define_expand "sse2_loadd"
6408 [(set (match_operand:V4SI 0 "register_operand" "")
6411 (match_operand:SI 1 "nonimmediate_operand" ""))
6415 "operands[2] = CONST0_RTX (V4SImode);")
6417 (define_insn "sse2_loadld"
6418 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x,x")
6421 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
6422 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
6426 %vmovd\t{%2, %0|%0, %2}
6427 %vmovd\t{%2, %0|%0, %2}
6428 movss\t{%2, %0|%0, %2}
6429 movss\t{%2, %0|%0, %2}
6430 vmovss\t{%2, %1, %0|%0, %1, %2}"
6431 [(set_attr "isa" "base,base,noavx,noavx,avx")
6432 (set_attr "type" "ssemov")
6433 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
6434 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
6436 (define_insn_and_split "sse2_stored"
6437 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
6439 (match_operand:V4SI 1 "register_operand" "x,Yi")
6440 (parallel [(const_int 0)])))]
6443 "&& reload_completed
6444 && (TARGET_INTER_UNIT_MOVES
6445 || MEM_P (operands [0])
6446 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6447 [(set (match_dup 0) (match_dup 1))]
6448 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
6450 (define_insn_and_split "*vec_ext_v4si_mem"
6451 [(set (match_operand:SI 0 "register_operand" "=r")
6453 (match_operand:V4SI 1 "memory_operand" "o")
6454 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
6460 int i = INTVAL (operands[2]);
6462 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
6466 (define_expand "sse_storeq"
6467 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6469 (match_operand:V2DI 1 "register_operand" "")
6470 (parallel [(const_int 0)])))]
6473 (define_insn "*sse2_storeq_rex64"
6474 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
6476 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
6477 (parallel [(const_int 0)])))]
6478 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6482 mov{q}\t{%1, %0|%0, %1}"
6483 [(set_attr "type" "*,*,imov")
6484 (set_attr "mode" "*,*,DI")])
6486 (define_insn "*sse2_storeq"
6487 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
6489 (match_operand:V2DI 1 "register_operand" "x")
6490 (parallel [(const_int 0)])))]
6495 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6497 (match_operand:V2DI 1 "register_operand" "")
6498 (parallel [(const_int 0)])))]
6501 && (TARGET_INTER_UNIT_MOVES
6502 || MEM_P (operands [0])
6503 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6504 [(set (match_dup 0) (match_dup 1))]
6505 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
6507 (define_insn "*vec_extractv2di_1_rex64"
6508 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
6510 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
6511 (parallel [(const_int 1)])))]
6512 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6514 %vmovhps\t{%1, %0|%0, %1}
6515 psrldq\t{$8, %0|%0, 8}
6516 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6517 %vmovq\t{%H1, %0|%0, %H1}
6518 mov{q}\t{%H1, %0|%0, %H1}"
6519 [(set_attr "isa" "base,noavx,avx,base,base")
6520 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
6521 (set_attr "length_immediate" "*,1,1,*,*")
6522 (set_attr "memory" "*,none,none,*,*")
6523 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
6524 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
6526 (define_insn "*vec_extractv2di_1_sse2"
6527 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x")
6529 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o")
6530 (parallel [(const_int 1)])))]
6532 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6534 %vmovhps\t{%1, %0|%0, %1}
6535 psrldq\t{$8, %0|%0, 8}
6536 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6537 %vmovq\t{%H1, %0|%0, %H1}"
6538 [(set_attr "isa" "base,noavx,avx,base")
6539 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov")
6540 (set_attr "length_immediate" "*,1,1,*")
6541 (set_attr "memory" "*,none,none,*")
6542 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex")
6543 (set_attr "mode" "V2SF,TI,TI,TI")])
6545 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
6546 (define_insn "*vec_extractv2di_1_sse"
6547 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
6549 (match_operand:V2DI 1 "nonimmediate_operand" " x,x,o")
6550 (parallel [(const_int 1)])))]
6551 "!TARGET_SSE2 && TARGET_SSE
6552 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6554 movhps\t{%1, %0|%0, %1}
6555 movhlps\t{%1, %0|%0, %1}
6556 movlps\t{%H1, %0|%0, %H1}"
6557 [(set_attr "type" "ssemov")
6558 (set_attr "mode" "V2SF,V4SF,V2SF")])
6560 (define_insn "*vec_dupv4si_avx"
6561 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6563 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
6566 vpshufd\t{$0, %1, %0|%0, %1, 0}
6567 vbroadcastss\t{%1, %0|%0, %1}"
6568 [(set_attr "type" "sselog1,ssemov")
6569 (set_attr "length_immediate" "1,0")
6570 (set_attr "prefix_extra" "0,1")
6571 (set_attr "prefix" "vex")
6572 (set_attr "mode" "TI,V4SF")])
6574 (define_insn "*vec_dupv4si"
6575 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
6577 (match_operand:SI 1 "register_operand" " Y2,0")))]
6580 pshufd\t{$0, %1, %0|%0, %1, 0}
6581 shufps\t{$0, %0, %0|%0, %0, 0}"
6582 [(set_attr "type" "sselog1")
6583 (set_attr "length_immediate" "1")
6584 (set_attr "mode" "TI,V4SF")])
6586 (define_insn "*vec_dupv2di_sse3"
6587 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6589 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
6593 vpunpcklqdq\t{%d1, %0|%0, %d1}
6594 %vmovddup\t{%1, %0|%0, %1}"
6595 [(set_attr "isa" "noavx,avx,base")
6596 (set_attr "type" "sselog1")
6597 (set_attr "prefix" "orig,vex,maybe_vex")
6598 (set_attr "mode" "TI,TI,DF")])
6600 (define_insn "*vec_dupv2di"
6601 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
6603 (match_operand:DI 1 "register_operand" " 0 ,0")))]
6608 [(set_attr "type" "sselog1,ssemov")
6609 (set_attr "mode" "TI,V4SF")])
6611 (define_insn "*vec_concatv2si_sse4_1"
6612 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
6614 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
6615 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
6618 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
6619 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
6620 punpckldq\t{%2, %0|%0, %2}
6621 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
6622 %vmovd\t{%1, %0|%0, %1}
6623 punpckldq\t{%2, %0|%0, %2}
6624 movd\t{%1, %0|%0, %1}"
6625 [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base")
6626 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6627 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
6628 (set_attr "length_immediate" "1,1,*,*,*,*,*")
6629 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
6630 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
6632 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6633 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6634 ;; alternatives pretty much forces the MMX alternative to be chosen.
6635 (define_insn "*vec_concatv2si_sse2"
6636 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
6638 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
6639 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
6642 punpckldq\t{%2, %0|%0, %2}
6643 movd\t{%1, %0|%0, %1}
6644 punpckldq\t{%2, %0|%0, %2}
6645 movd\t{%1, %0|%0, %1}"
6646 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6647 (set_attr "mode" "TI,TI,DI,DI")])
6649 (define_insn "*vec_concatv2si_sse"
6650 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
6652 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
6653 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
6656 unpcklps\t{%2, %0|%0, %2}
6657 movss\t{%1, %0|%0, %1}
6658 punpckldq\t{%2, %0|%0, %2}
6659 movd\t{%1, %0|%0, %1}"
6660 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6661 (set_attr "mode" "V4SF,V4SF,DI,DI")])
6663 (define_insn "*vec_concatv4si_1_avx"
6664 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6666 (match_operand:V2SI 1 "register_operand" " x,x")
6667 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
6670 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6671 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6672 [(set_attr "type" "sselog,ssemov")
6673 (set_attr "prefix" "vex")
6674 (set_attr "mode" "TI,V2SF")])
6676 (define_insn "*vec_concatv4si_1"
6677 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
6679 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
6680 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
6683 punpcklqdq\t{%2, %0|%0, %2}
6684 movlhps\t{%2, %0|%0, %2}
6685 movhps\t{%2, %0|%0, %2}"
6686 [(set_attr "type" "sselog,ssemov,ssemov")
6687 (set_attr "mode" "TI,V4SF,V2SF")])
6689 ;; movd instead of movq is required to handle broken assemblers.
6690 (define_insn "*vec_concatv2di_rex64_sse4_1"
6691 [(set (match_operand:V2DI 0 "register_operand"
6692 "=x, x, x,Yi,!x,x,x,x,x")
6694 (match_operand:DI 1 "nonimmediate_operand"
6695 " 0, x,xm,r ,*y,0,x,0,x")
6696 (match_operand:DI 2 "vector_move_operand"
6697 "rm,rm, C,C ,C ,x,x,m,m")))]
6698 "TARGET_64BIT && TARGET_SSE4_1"
6700 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
6701 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
6702 %vmovq\t{%1, %0|%0, %1}
6703 %vmovd\t{%1, %0|%0, %1}
6704 movq2dq\t{%1, %0|%0, %1}
6705 punpcklqdq\t{%2, %0|%0, %2}
6706 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6707 movhps\t{%2, %0|%0, %2}
6708 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6709 [(set_attr "isa" "noavx,avx,base,base,base,noavx,avx,noavx,avx")
6710 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov,sselog,sselog,ssemov,ssemov")
6711 (set (attr "prefix_rex")
6713 (and (eq_attr "alternative" "0,3")
6714 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
6716 (const_string "*")))
6717 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
6718 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
6719 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
6720 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
6722 ;; movd instead of movq is required to handle broken assemblers.
6723 (define_insn "*vec_concatv2di_rex64_sse"
6724 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x")
6726 (match_operand:DI 1 "nonimmediate_operand" "Y2m,r ,*y ,0 ,0,0")
6727 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
6728 "TARGET_64BIT && TARGET_SSE"
6730 movq\t{%1, %0|%0, %1}
6731 movd\t{%1, %0|%0, %1}
6732 movq2dq\t{%1, %0|%0, %1}
6733 punpcklqdq\t{%2, %0|%0, %2}
6734 movlhps\t{%2, %0|%0, %2}
6735 movhps\t{%2, %0|%0, %2}"
6736 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
6737 (set_attr "prefix_rex" "*,1,*,*,*,*")
6738 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
6740 (define_insn "vec_concatv2di"
6741 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x,x")
6743 (match_operand:DI 1 "nonimmediate_operand" "Y2m,*y , 0,x,0,0,x")
6744 (match_operand:DI 2 "vector_move_operand" " C , C ,Y2,x,x,m,m")))]
6745 "!TARGET_64BIT && TARGET_SSE"
6747 %vmovq\t{%1, %0|%0, %1}
6748 movq2dq\t{%1, %0|%0, %1}
6749 punpcklqdq\t{%2, %0|%0, %2}
6750 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6751 movlhps\t{%2, %0|%0, %2}
6752 movhps\t{%2, %0|%0, %2}
6753 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6754 [(set_attr "isa" "base,base,noavx,avx,noavx,noavx,avx")
6755 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
6756 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
6757 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
6759 (define_expand "vec_unpacks_lo_<mode>"
6760 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6761 (match_operand:VI124_128 1 "register_operand" "")]
6763 "ix86_expand_sse_unpack (operands, false, false); DONE;")
6765 (define_expand "vec_unpacks_hi_<mode>"
6766 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6767 (match_operand:VI124_128 1 "register_operand" "")]
6769 "ix86_expand_sse_unpack (operands, false, true); DONE;")
6771 (define_expand "vec_unpacku_lo_<mode>"
6772 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6773 (match_operand:VI124_128 1 "register_operand" "")]
6775 "ix86_expand_sse_unpack (operands, true, false); DONE;")
6777 (define_expand "vec_unpacku_hi_<mode>"
6778 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6779 (match_operand:VI124_128 1 "register_operand" "")]
6781 "ix86_expand_sse_unpack (operands, true, true); DONE;")
6783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6787 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6789 (define_expand "sse2_uavgv16qi3"
6790 [(set (match_operand:V16QI 0 "register_operand" "")
6796 (match_operand:V16QI 1 "nonimmediate_operand" ""))
6798 (match_operand:V16QI 2 "nonimmediate_operand" "")))
6799 (const_vector:V16QI [(const_int 1) (const_int 1)
6800 (const_int 1) (const_int 1)
6801 (const_int 1) (const_int 1)
6802 (const_int 1) (const_int 1)
6803 (const_int 1) (const_int 1)
6804 (const_int 1) (const_int 1)
6805 (const_int 1) (const_int 1)
6806 (const_int 1) (const_int 1)]))
6809 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
6811 (define_insn "*sse2_uavgv16qi3"
6812 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6818 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
6820 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
6821 (const_vector:V16QI [(const_int 1) (const_int 1)
6822 (const_int 1) (const_int 1)
6823 (const_int 1) (const_int 1)
6824 (const_int 1) (const_int 1)
6825 (const_int 1) (const_int 1)
6826 (const_int 1) (const_int 1)
6827 (const_int 1) (const_int 1)
6828 (const_int 1) (const_int 1)]))
6830 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
6832 pavgb\t{%2, %0|%0, %2}
6833 vpavgb\t{%2, %1, %0|%0, %1, %2}"
6834 [(set_attr "isa" "noavx,avx")
6835 (set_attr "type" "sseiadd")
6836 (set_attr "prefix_data16" "1,*")
6837 (set_attr "prefix" "orig,vex")
6838 (set_attr "mode" "TI")])
6840 (define_expand "sse2_uavgv8hi3"
6841 [(set (match_operand:V8HI 0 "register_operand" "")
6847 (match_operand:V8HI 1 "nonimmediate_operand" ""))
6849 (match_operand:V8HI 2 "nonimmediate_operand" "")))
6850 (const_vector:V8HI [(const_int 1) (const_int 1)
6851 (const_int 1) (const_int 1)
6852 (const_int 1) (const_int 1)
6853 (const_int 1) (const_int 1)]))
6856 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
6858 (define_insn "*sse2_uavgv8hi3"
6859 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6865 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
6867 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
6868 (const_vector:V8HI [(const_int 1) (const_int 1)
6869 (const_int 1) (const_int 1)
6870 (const_int 1) (const_int 1)
6871 (const_int 1) (const_int 1)]))
6873 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
6875 pavgw\t{%2, %0|%0, %2}
6876 vpavgw\t{%2, %1, %0|%0, %1, %2}"
6877 [(set_attr "isa" "noavx,avx")
6878 (set_attr "type" "sseiadd")
6879 (set_attr "prefix_data16" "1,*")
6880 (set_attr "prefix" "orig,vex")
6881 (set_attr "mode" "TI")])
6883 ;; The correct representation for this is absolutely enormous, and
6884 ;; surely not generally useful.
6885 (define_insn "sse2_psadbw"
6886 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6887 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0,x")
6888 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")]
6892 psadbw\t{%2, %0|%0, %2}
6893 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
6894 [(set_attr "isa" "noavx,avx")
6895 (set_attr "type" "sseiadd")
6896 (set_attr "atom_unit" "simul")
6897 (set_attr "prefix_data16" "1,*")
6898 (set_attr "prefix" "orig,vex")
6899 (set_attr "mode" "TI")])
6901 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
6902 [(set (match_operand:SI 0 "register_operand" "=r")
6904 [(match_operand:VF 1 "register_operand" "x")]
6907 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
6908 [(set_attr "type" "ssemov")
6909 (set_attr "prefix" "maybe_vex")
6910 (set_attr "mode" "<MODE>")])
6912 (define_insn "sse2_pmovmskb"
6913 [(set (match_operand:SI 0 "register_operand" "=r")
6914 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
6917 "%vpmovmskb\t{%1, %0|%0, %1}"
6918 [(set_attr "type" "ssemov")
6919 (set_attr "prefix_data16" "1")
6920 (set_attr "prefix" "maybe_vex")
6921 (set_attr "mode" "SI")])
6923 (define_expand "sse2_maskmovdqu"
6924 [(set (match_operand:V16QI 0 "memory_operand" "")
6925 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
6926 (match_operand:V16QI 2 "register_operand" "")
6931 (define_insn "*sse2_maskmovdqu"
6932 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
6933 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
6934 (match_operand:V16QI 2 "register_operand" "x")
6935 (mem:V16QI (match_dup 0))]
6938 "%vmaskmovdqu\t{%2, %1|%1, %2}"
6939 [(set_attr "type" "ssemov")
6940 (set_attr "prefix_data16" "1")
6941 ;; The implicit %rdi operand confuses default length_vex computation.
6942 (set (attr "length_vex")
6943 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
6944 (set_attr "prefix" "maybe_vex")
6945 (set_attr "mode" "TI")])
6947 (define_insn "sse_ldmxcsr"
6948 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
6952 [(set_attr "type" "sse")
6953 (set_attr "atom_sse_attr" "mxcsr")
6954 (set_attr "prefix" "maybe_vex")
6955 (set_attr "memory" "load")])
6957 (define_insn "sse_stmxcsr"
6958 [(set (match_operand:SI 0 "memory_operand" "=m")
6959 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
6962 [(set_attr "type" "sse")
6963 (set_attr "atom_sse_attr" "mxcsr")
6964 (set_attr "prefix" "maybe_vex")
6965 (set_attr "memory" "store")])
6967 (define_expand "sse_sfence"
6969 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
6970 "TARGET_SSE || TARGET_3DNOW_A"
6972 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
6973 MEM_VOLATILE_P (operands[0]) = 1;
6976 (define_insn "*sse_sfence"
6977 [(set (match_operand:BLK 0 "" "")
6978 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
6979 "TARGET_SSE || TARGET_3DNOW_A"
6981 [(set_attr "type" "sse")
6982 (set_attr "length_address" "0")
6983 (set_attr "atom_sse_attr" "fence")
6984 (set_attr "memory" "unknown")])
6986 (define_insn "sse2_clflush"
6987 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
6991 [(set_attr "type" "sse")
6992 (set_attr "atom_sse_attr" "fence")
6993 (set_attr "memory" "unknown")])
6995 (define_expand "sse2_mfence"
6997 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7000 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7001 MEM_VOLATILE_P (operands[0]) = 1;
7004 (define_insn "*sse2_mfence"
7005 [(set (match_operand:BLK 0 "" "")
7006 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7007 "TARGET_64BIT || TARGET_SSE2"
7009 [(set_attr "type" "sse")
7010 (set_attr "length_address" "0")
7011 (set_attr "atom_sse_attr" "fence")
7012 (set_attr "memory" "unknown")])
7014 (define_expand "sse2_lfence"
7016 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7019 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7020 MEM_VOLATILE_P (operands[0]) = 1;
7023 (define_insn "*sse2_lfence"
7024 [(set (match_operand:BLK 0 "" "")
7025 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7028 [(set_attr "type" "sse")
7029 (set_attr "length_address" "0")
7030 (set_attr "atom_sse_attr" "lfence")
7031 (set_attr "memory" "unknown")])
7033 (define_insn "sse3_mwait"
7034 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7035 (match_operand:SI 1 "register_operand" "c")]
7038 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7039 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7040 ;; we only need to set up 32bit registers.
7042 [(set_attr "length" "3")])
7044 (define_insn "sse3_monitor"
7045 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7046 (match_operand:SI 1 "register_operand" "c")
7047 (match_operand:SI 2 "register_operand" "d")]
7049 "TARGET_SSE3 && !TARGET_64BIT"
7050 "monitor\t%0, %1, %2"
7051 [(set_attr "length" "3")])
7053 (define_insn "sse3_monitor64"
7054 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7055 (match_operand:SI 1 "register_operand" "c")
7056 (match_operand:SI 2 "register_operand" "d")]
7058 "TARGET_SSE3 && TARGET_64BIT"
7059 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7060 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7061 ;; zero extended to 64bit, we only need to set up 32bit registers.
7063 [(set_attr "length" "3")])
7065 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7067 ;; SSSE3 instructions
7069 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7071 (define_insn "ssse3_phaddwv8hi3"
7072 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7078 (match_operand:V8HI 1 "register_operand" "0,x")
7079 (parallel [(const_int 0)]))
7080 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7082 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7083 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7086 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7087 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7089 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7090 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7095 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7096 (parallel [(const_int 0)]))
7097 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7099 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7100 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7103 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7104 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7106 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7107 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7110 phaddw\t{%2, %0|%0, %2}
7111 vphaddw\t{%2, %1, %0|%0, %1, %2}"
7112 [(set_attr "isa" "noavx,avx")
7113 (set_attr "type" "sseiadd")
7114 (set_attr "atom_unit" "complex")
7115 (set_attr "prefix_data16" "1,*")
7116 (set_attr "prefix_extra" "1")
7117 (set_attr "prefix" "orig,vex")
7118 (set_attr "mode" "TI")])
7120 (define_insn "ssse3_phaddwv4hi3"
7121 [(set (match_operand:V4HI 0 "register_operand" "=y")
7126 (match_operand:V4HI 1 "register_operand" "0")
7127 (parallel [(const_int 0)]))
7128 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7130 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7131 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7135 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7136 (parallel [(const_int 0)]))
7137 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7139 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7140 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7142 "phaddw\t{%2, %0|%0, %2}"
7143 [(set_attr "type" "sseiadd")
7144 (set_attr "atom_unit" "complex")
7145 (set_attr "prefix_extra" "1")
7146 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7147 (set_attr "mode" "DI")])
7149 (define_insn "ssse3_phadddv4si3"
7150 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7155 (match_operand:V4SI 1 "register_operand" "0,x")
7156 (parallel [(const_int 0)]))
7157 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7159 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7160 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7164 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7165 (parallel [(const_int 0)]))
7166 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7168 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7169 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7172 phaddd\t{%2, %0|%0, %2}
7173 vphaddd\t{%2, %1, %0|%0, %1, %2}"
7174 [(set_attr "isa" "noavx,avx")
7175 (set_attr "type" "sseiadd")
7176 (set_attr "atom_unit" "complex")
7177 (set_attr "prefix_data16" "1,*")
7178 (set_attr "prefix_extra" "1")
7179 (set_attr "prefix" "orig,vex")
7180 (set_attr "mode" "TI")])
7182 (define_insn "ssse3_phadddv2si3"
7183 [(set (match_operand:V2SI 0 "register_operand" "=y")
7187 (match_operand:V2SI 1 "register_operand" "0")
7188 (parallel [(const_int 0)]))
7189 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7192 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7193 (parallel [(const_int 0)]))
7194 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7196 "phaddd\t{%2, %0|%0, %2}"
7197 [(set_attr "type" "sseiadd")
7198 (set_attr "atom_unit" "complex")
7199 (set_attr "prefix_extra" "1")
7200 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7201 (set_attr "mode" "DI")])
7203 (define_insn "ssse3_phaddswv8hi3"
7204 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7210 (match_operand:V8HI 1 "register_operand" "0,x")
7211 (parallel [(const_int 0)]))
7212 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7214 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7215 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7218 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7219 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7221 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7222 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7227 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7228 (parallel [(const_int 0)]))
7229 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7231 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7232 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7235 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7236 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7238 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7239 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7242 phaddsw\t{%2, %0|%0, %2}
7243 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
7244 [(set_attr "isa" "noavx,avx")
7245 (set_attr "type" "sseiadd")
7246 (set_attr "atom_unit" "complex")
7247 (set_attr "prefix_data16" "1,*")
7248 (set_attr "prefix_extra" "1")
7249 (set_attr "prefix" "orig,vex")
7250 (set_attr "mode" "TI")])
7252 (define_insn "ssse3_phaddswv4hi3"
7253 [(set (match_operand:V4HI 0 "register_operand" "=y")
7258 (match_operand:V4HI 1 "register_operand" "0")
7259 (parallel [(const_int 0)]))
7260 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7262 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7263 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7267 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7268 (parallel [(const_int 0)]))
7269 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7271 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7272 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7274 "phaddsw\t{%2, %0|%0, %2}"
7275 [(set_attr "type" "sseiadd")
7276 (set_attr "atom_unit" "complex")
7277 (set_attr "prefix_extra" "1")
7278 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7279 (set_attr "mode" "DI")])
7281 (define_insn "ssse3_phsubwv8hi3"
7282 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7288 (match_operand:V8HI 1 "register_operand" "0,x")
7289 (parallel [(const_int 0)]))
7290 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7292 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7293 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7296 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7297 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7299 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7300 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7305 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7306 (parallel [(const_int 0)]))
7307 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7309 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7310 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7313 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7314 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7316 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7317 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7320 phsubw\t{%2, %0|%0, %2}
7321 vphsubw\t{%2, %1, %0|%0, %1, %2}"
7322 [(set_attr "isa" "noavx,avx")
7323 (set_attr "type" "sseiadd")
7324 (set_attr "atom_unit" "complex")
7325 (set_attr "prefix_data16" "1,*")
7326 (set_attr "prefix_extra" "1")
7327 (set_attr "prefix" "orig,vex")
7328 (set_attr "mode" "TI")])
7330 (define_insn "ssse3_phsubwv4hi3"
7331 [(set (match_operand:V4HI 0 "register_operand" "=y")
7336 (match_operand:V4HI 1 "register_operand" "0")
7337 (parallel [(const_int 0)]))
7338 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7340 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7341 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7345 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7346 (parallel [(const_int 0)]))
7347 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7349 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7350 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7352 "phsubw\t{%2, %0|%0, %2}"
7353 [(set_attr "type" "sseiadd")
7354 (set_attr "atom_unit" "complex")
7355 (set_attr "prefix_extra" "1")
7356 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7357 (set_attr "mode" "DI")])
7359 (define_insn "ssse3_phsubdv4si3"
7360 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7365 (match_operand:V4SI 1 "register_operand" "0,x")
7366 (parallel [(const_int 0)]))
7367 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7369 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7370 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7374 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7375 (parallel [(const_int 0)]))
7376 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7378 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7379 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7382 phsubd\t{%2, %0|%0, %2}
7383 vphsubd\t{%2, %1, %0|%0, %1, %2}"
7385 [(set_attr "isa" "noavx,avx")
7386 (set_attr "type" "sseiadd")
7387 (set_attr "atom_unit" "complex")
7388 (set_attr "prefix_data16" "1,*")
7389 (set_attr "prefix_extra" "1")
7390 (set_attr "prefix" "orig,vex")
7391 (set_attr "mode" "TI")])
7393 (define_insn "ssse3_phsubdv2si3"
7394 [(set (match_operand:V2SI 0 "register_operand" "=y")
7398 (match_operand:V2SI 1 "register_operand" "0")
7399 (parallel [(const_int 0)]))
7400 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7403 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7404 (parallel [(const_int 0)]))
7405 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7407 "phsubd\t{%2, %0|%0, %2}"
7408 [(set_attr "type" "sseiadd")
7409 (set_attr "atom_unit" "complex")
7410 (set_attr "prefix_extra" "1")
7411 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7412 (set_attr "mode" "DI")])
7414 (define_insn "ssse3_phsubswv8hi3"
7415 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7421 (match_operand:V8HI 1 "register_operand" "0,x")
7422 (parallel [(const_int 0)]))
7423 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7425 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7426 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7429 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7430 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7432 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7433 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7438 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7439 (parallel [(const_int 0)]))
7440 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7442 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7443 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7446 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7447 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7449 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7450 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7453 phsubsw\t{%2, %0|%0, %2}
7454 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
7455 [(set_attr "isa" "noavx,avx")
7456 (set_attr "type" "sseiadd")
7457 (set_attr "atom_unit" "complex")
7458 (set_attr "prefix_data16" "1,*")
7459 (set_attr "prefix_extra" "1")
7460 (set_attr "prefix" "orig,vex")
7461 (set_attr "mode" "TI")])
7463 (define_insn "ssse3_phsubswv4hi3"
7464 [(set (match_operand:V4HI 0 "register_operand" "=y")
7469 (match_operand:V4HI 1 "register_operand" "0")
7470 (parallel [(const_int 0)]))
7471 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7473 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7474 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7478 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7479 (parallel [(const_int 0)]))
7480 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7482 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7483 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7485 "phsubsw\t{%2, %0|%0, %2}"
7486 [(set_attr "type" "sseiadd")
7487 (set_attr "atom_unit" "complex")
7488 (set_attr "prefix_extra" "1")
7489 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7490 (set_attr "mode" "DI")])
7492 (define_insn "ssse3_pmaddubsw128"
7493 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7498 (match_operand:V16QI 1 "register_operand" "0,x")
7499 (parallel [(const_int 0)
7509 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
7510 (parallel [(const_int 0)
7520 (vec_select:V8QI (match_dup 1)
7521 (parallel [(const_int 1)
7530 (vec_select:V8QI (match_dup 2)
7531 (parallel [(const_int 1)
7538 (const_int 15)]))))))]
7541 pmaddubsw\t{%2, %0|%0, %2}
7542 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
7543 [(set_attr "isa" "noavx,avx")
7544 (set_attr "type" "sseiadd")
7545 (set_attr "atom_unit" "simul")
7546 (set_attr "prefix_data16" "1,*")
7547 (set_attr "prefix_extra" "1")
7548 (set_attr "prefix" "orig,vex")
7549 (set_attr "mode" "TI")])
7551 (define_insn "ssse3_pmaddubsw"
7552 [(set (match_operand:V4HI 0 "register_operand" "=y")
7557 (match_operand:V8QI 1 "register_operand" "0")
7558 (parallel [(const_int 0)
7564 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
7565 (parallel [(const_int 0)
7571 (vec_select:V4QI (match_dup 1)
7572 (parallel [(const_int 1)
7577 (vec_select:V4QI (match_dup 2)
7578 (parallel [(const_int 1)
7581 (const_int 7)]))))))]
7583 "pmaddubsw\t{%2, %0|%0, %2}"
7584 [(set_attr "type" "sseiadd")
7585 (set_attr "atom_unit" "simul")
7586 (set_attr "prefix_extra" "1")
7587 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7588 (set_attr "mode" "DI")])
7590 (define_expand "ssse3_pmulhrswv8hi3"
7591 [(set (match_operand:V8HI 0 "register_operand" "")
7598 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7600 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7602 (const_vector:V8HI [(const_int 1) (const_int 1)
7603 (const_int 1) (const_int 1)
7604 (const_int 1) (const_int 1)
7605 (const_int 1) (const_int 1)]))
7608 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
7610 (define_insn "*ssse3_pmulhrswv8hi3"
7611 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7618 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7620 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7622 (const_vector:V8HI [(const_int 1) (const_int 1)
7623 (const_int 1) (const_int 1)
7624 (const_int 1) (const_int 1)
7625 (const_int 1) (const_int 1)]))
7627 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
7629 pmulhrsw\t{%2, %0|%0, %2}
7630 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
7631 [(set_attr "isa" "noavx,avx")
7632 (set_attr "type" "sseimul")
7633 (set_attr "prefix_data16" "1,*")
7634 (set_attr "prefix_extra" "1")
7635 (set_attr "prefix" "orig,vex")
7636 (set_attr "mode" "TI")])
7638 (define_expand "ssse3_pmulhrswv4hi3"
7639 [(set (match_operand:V4HI 0 "register_operand" "")
7646 (match_operand:V4HI 1 "nonimmediate_operand" ""))
7648 (match_operand:V4HI 2 "nonimmediate_operand" "")))
7650 (const_vector:V4HI [(const_int 1) (const_int 1)
7651 (const_int 1) (const_int 1)]))
7654 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
7656 (define_insn "*ssse3_pmulhrswv4hi3"
7657 [(set (match_operand:V4HI 0 "register_operand" "=y")
7664 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
7666 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
7668 (const_vector:V4HI [(const_int 1) (const_int 1)
7669 (const_int 1) (const_int 1)]))
7671 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
7672 "pmulhrsw\t{%2, %0|%0, %2}"
7673 [(set_attr "type" "sseimul")
7674 (set_attr "prefix_extra" "1")
7675 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7676 (set_attr "mode" "DI")])
7678 (define_insn "ssse3_pshufbv16qi3"
7679 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7680 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x")
7681 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")]
7685 pshufb\t{%2, %0|%0, %2}
7686 vpshufb\t{%2, %1, %0|%0, %1, %2}"
7687 [(set_attr "isa" "noavx,avx")
7688 (set_attr "type" "sselog1")
7689 (set_attr "prefix_data16" "1,*")
7690 (set_attr "prefix_extra" "1")
7691 (set_attr "prefix" "orig,vex")
7692 (set_attr "mode" "TI")])
7694 (define_insn "ssse3_pshufbv8qi3"
7695 [(set (match_operand:V8QI 0 "register_operand" "=y")
7696 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
7697 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
7700 "pshufb\t{%2, %0|%0, %2}";
7701 [(set_attr "type" "sselog1")
7702 (set_attr "prefix_extra" "1")
7703 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7704 (set_attr "mode" "DI")])
7706 (define_insn "ssse3_psign<mode>3"
7707 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
7709 [(match_operand:VI124_128 1 "register_operand" "0,x")
7710 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")]
7714 psign<ssemodesuffix>\t{%2, %0|%0, %2}
7715 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7716 [(set_attr "isa" "noavx,avx")
7717 (set_attr "type" "sselog1")
7718 (set_attr "prefix_data16" "1,*")
7719 (set_attr "prefix_extra" "1")
7720 (set_attr "prefix" "orig,vex")
7721 (set_attr "mode" "TI")])
7723 (define_insn "ssse3_psign<mode>3"
7724 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
7726 [(match_operand:MMXMODEI 1 "register_operand" "0")
7727 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
7730 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
7731 [(set_attr "type" "sselog1")
7732 (set_attr "prefix_extra" "1")
7733 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7734 (set_attr "mode" "DI")])
7736 (define_insn "ssse3_palignrti"
7737 [(set (match_operand:TI 0 "register_operand" "=x,x")
7738 (unspec:TI [(match_operand:TI 1 "register_operand" "0,x")
7739 (match_operand:TI 2 "nonimmediate_operand" "xm,xm")
7740 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
7744 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
7746 switch (which_alternative)
7749 return "palignr\t{%3, %2, %0|%0, %2, %3}";
7751 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7756 [(set_attr "isa" "noavx,avx")
7757 (set_attr "type" "sseishft")
7758 (set_attr "atom_unit" "sishuf")
7759 (set_attr "prefix_data16" "1,*")
7760 (set_attr "prefix_extra" "1")
7761 (set_attr "length_immediate" "1")
7762 (set_attr "prefix" "orig,vex")
7763 (set_attr "mode" "TI")])
7765 (define_insn "ssse3_palignrdi"
7766 [(set (match_operand:DI 0 "register_operand" "=y")
7767 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
7768 (match_operand:DI 2 "nonimmediate_operand" "ym")
7769 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
7773 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
7774 return "palignr\t{%3, %2, %0|%0, %2, %3}";
7776 [(set_attr "type" "sseishft")
7777 (set_attr "atom_unit" "sishuf")
7778 (set_attr "prefix_extra" "1")
7779 (set_attr "length_immediate" "1")
7780 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7781 (set_attr "mode" "DI")])
7783 (define_insn "abs<mode>2"
7784 [(set (match_operand:VI124_128 0 "register_operand" "=x")
7786 (match_operand:VI124_128 1 "nonimmediate_operand" "xm")))]
7788 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
7789 [(set_attr "type" "sselog1")
7790 (set_attr "prefix_data16" "1")
7791 (set_attr "prefix_extra" "1")
7792 (set_attr "prefix" "maybe_vex")
7793 (set_attr "mode" "TI")])
7795 (define_insn "abs<mode>2"
7796 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
7798 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
7800 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
7801 [(set_attr "type" "sselog1")
7802 (set_attr "prefix_rep" "0")
7803 (set_attr "prefix_extra" "1")
7804 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7805 (set_attr "mode" "DI")])
7807 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7809 ;; AMD SSE4A instructions
7811 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7813 (define_insn "sse4a_movnt<mode>"
7814 [(set (match_operand:MODEF 0 "memory_operand" "=m")
7816 [(match_operand:MODEF 1 "register_operand" "x")]
7819 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
7820 [(set_attr "type" "ssemov")
7821 (set_attr "mode" "<MODE>")])
7823 (define_insn "sse4a_vmmovnt<mode>"
7824 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
7825 (unspec:<ssescalarmode>
7826 [(vec_select:<ssescalarmode>
7827 (match_operand:VF_128 1 "register_operand" "x")
7828 (parallel [(const_int 0)]))]
7831 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
7832 [(set_attr "type" "ssemov")
7833 (set_attr "mode" "<ssescalarmode>")])
7835 (define_insn "sse4a_extrqi"
7836 [(set (match_operand:V2DI 0 "register_operand" "=x")
7837 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7838 (match_operand 2 "const_int_operand" "")
7839 (match_operand 3 "const_int_operand" "")]
7842 "extrq\t{%3, %2, %0|%0, %2, %3}"
7843 [(set_attr "type" "sse")
7844 (set_attr "prefix_data16" "1")
7845 (set_attr "length_immediate" "2")
7846 (set_attr "mode" "TI")])
7848 (define_insn "sse4a_extrq"
7849 [(set (match_operand:V2DI 0 "register_operand" "=x")
7850 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7851 (match_operand:V16QI 2 "register_operand" "x")]
7854 "extrq\t{%2, %0|%0, %2}"
7855 [(set_attr "type" "sse")
7856 (set_attr "prefix_data16" "1")
7857 (set_attr "mode" "TI")])
7859 (define_insn "sse4a_insertqi"
7860 [(set (match_operand:V2DI 0 "register_operand" "=x")
7861 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7862 (match_operand:V2DI 2 "register_operand" "x")
7863 (match_operand 3 "const_int_operand" "")
7864 (match_operand 4 "const_int_operand" "")]
7867 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
7868 [(set_attr "type" "sseins")
7869 (set_attr "prefix_data16" "0")
7870 (set_attr "prefix_rep" "1")
7871 (set_attr "length_immediate" "2")
7872 (set_attr "mode" "TI")])
7874 (define_insn "sse4a_insertq"
7875 [(set (match_operand:V2DI 0 "register_operand" "=x")
7876 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7877 (match_operand:V2DI 2 "register_operand" "x")]
7880 "insertq\t{%2, %0|%0, %2}"
7881 [(set_attr "type" "sseins")
7882 (set_attr "prefix_data16" "0")
7883 (set_attr "prefix_rep" "1")
7884 (set_attr "mode" "TI")])
7886 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7888 ;; Intel SSE4.1 instructions
7890 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7892 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
7893 [(set (match_operand:VF 0 "register_operand" "=x,x")
7895 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
7896 (match_operand:VF 1 "register_operand" "0,x")
7897 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n,n")))]
7900 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7901 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7902 [(set_attr "isa" "noavx,avx")
7903 (set_attr "type" "ssemov")
7904 (set_attr "length_immediate" "1")
7905 (set_attr "prefix_data16" "1,*")
7906 (set_attr "prefix_extra" "1")
7907 (set_attr "prefix" "orig,vex")
7908 (set_attr "mode" "<MODE>")])
7910 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
7911 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
7913 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
7914 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
7915 (match_operand:VF 3 "register_operand" "Yz,x")]
7919 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7920 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7921 [(set_attr "isa" "noavx,avx")
7922 (set_attr "type" "ssemov")
7923 (set_attr "length_immediate" "1")
7924 (set_attr "prefix_data16" "1,*")
7925 (set_attr "prefix_extra" "1")
7926 (set_attr "prefix" "orig,vex")
7927 (set_attr "mode" "<MODE>")])
7929 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
7930 [(set (match_operand:VF 0 "register_operand" "=x,x")
7932 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
7933 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
7934 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7938 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7939 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7940 [(set_attr "isa" "noavx,avx")
7941 (set_attr "type" "ssemul")
7942 (set_attr "length_immediate" "1")
7943 (set_attr "prefix_data16" "1,*")
7944 (set_attr "prefix_extra" "1")
7945 (set_attr "prefix" "orig,vex")
7946 (set_attr "mode" "<MODE>")])
7948 (define_insn "sse4_1_movntdqa"
7949 [(set (match_operand:V2DI 0 "register_operand" "=x")
7950 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
7953 "%vmovntdqa\t{%1, %0|%0, %1}"
7954 [(set_attr "type" "ssemov")
7955 (set_attr "prefix_extra" "1")
7956 (set_attr "prefix" "maybe_vex")
7957 (set_attr "mode" "TI")])
7959 (define_insn "sse4_1_mpsadbw"
7960 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7961 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x")
7962 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
7963 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7967 mpsadbw\t{%3, %2, %0|%0, %2, %3}
7968 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7969 [(set_attr "isa" "noavx,avx")
7970 (set_attr "type" "sselog1")
7971 (set_attr "length_immediate" "1")
7972 (set_attr "prefix_extra" "1")
7973 (set_attr "prefix" "orig,vex")
7974 (set_attr "mode" "TI")])
7976 (define_insn "sse4_1_packusdw"
7977 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7980 (match_operand:V4SI 1 "register_operand" "0,x"))
7982 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
7985 packusdw\t{%2, %0|%0, %2}
7986 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
7987 [(set_attr "isa" "noavx,avx")
7988 (set_attr "type" "sselog")
7989 (set_attr "prefix_extra" "1")
7990 (set_attr "prefix" "orig,vex")
7991 (set_attr "mode" "TI")])
7993 (define_insn "sse4_1_pblendvb"
7994 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x,x")
7996 [(match_operand:V16QI 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
7997 (match_operand:V16QI 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
7998 (match_operand:V16QI 3 "register_operand" "Yz,x")]
8002 pblendvb\t{%3, %2, %0|%0, %2, %3}
8003 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8004 [(set_attr "isa" "noavx,avx")
8005 (set_attr "type" "ssemov")
8006 (set_attr "prefix_extra" "1")
8007 (set_attr "length_immediate" "*,1")
8008 (set_attr "prefix" "orig,vex")
8009 (set_attr "mode" "TI")])
8011 (define_insn "sse4_1_pblendw"
8012 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8014 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8015 (match_operand:V8HI 1 "register_operand" "0,x")
8016 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
8019 pblendw\t{%3, %2, %0|%0, %2, %3}
8020 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8021 [(set_attr "isa" "noavx,avx")
8022 (set_attr "type" "ssemov")
8023 (set_attr "prefix_extra" "1")
8024 (set_attr "length_immediate" "1")
8025 (set_attr "prefix" "orig,vex")
8026 (set_attr "mode" "TI")])
8028 (define_insn "sse4_1_phminposuw"
8029 [(set (match_operand:V8HI 0 "register_operand" "=x")
8030 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
8031 UNSPEC_PHMINPOSUW))]
8033 "%vphminposuw\t{%1, %0|%0, %1}"
8034 [(set_attr "type" "sselog1")
8035 (set_attr "prefix_extra" "1")
8036 (set_attr "prefix" "maybe_vex")
8037 (set_attr "mode" "TI")])
8039 (define_insn "sse4_1_<code>v8qiv8hi2"
8040 [(set (match_operand:V8HI 0 "register_operand" "=x")
8043 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8044 (parallel [(const_int 0)
8053 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
8054 [(set_attr "type" "ssemov")
8055 (set_attr "prefix_extra" "1")
8056 (set_attr "prefix" "maybe_vex")
8057 (set_attr "mode" "TI")])
8059 (define_insn "sse4_1_<code>v4qiv4si2"
8060 [(set (match_operand:V4SI 0 "register_operand" "=x")
8063 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8064 (parallel [(const_int 0)
8069 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
8070 [(set_attr "type" "ssemov")
8071 (set_attr "prefix_extra" "1")
8072 (set_attr "prefix" "maybe_vex")
8073 (set_attr "mode" "TI")])
8075 (define_insn "sse4_1_<code>v4hiv4si2"
8076 [(set (match_operand:V4SI 0 "register_operand" "=x")
8079 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8080 (parallel [(const_int 0)
8085 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
8086 [(set_attr "type" "ssemov")
8087 (set_attr "prefix_extra" "1")
8088 (set_attr "prefix" "maybe_vex")
8089 (set_attr "mode" "TI")])
8091 (define_insn "sse4_1_<code>v2qiv2di2"
8092 [(set (match_operand:V2DI 0 "register_operand" "=x")
8095 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8096 (parallel [(const_int 0)
8099 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
8100 [(set_attr "type" "ssemov")
8101 (set_attr "prefix_extra" "1")
8102 (set_attr "prefix" "maybe_vex")
8103 (set_attr "mode" "TI")])
8105 (define_insn "sse4_1_<code>v2hiv2di2"
8106 [(set (match_operand:V2DI 0 "register_operand" "=x")
8109 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8110 (parallel [(const_int 0)
8113 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
8114 [(set_attr "type" "ssemov")
8115 (set_attr "prefix_extra" "1")
8116 (set_attr "prefix" "maybe_vex")
8117 (set_attr "mode" "TI")])
8119 (define_insn "sse4_1_<code>v2siv2di2"
8120 [(set (match_operand:V2DI 0 "register_operand" "=x")
8123 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8124 (parallel [(const_int 0)
8127 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
8128 [(set_attr "type" "ssemov")
8129 (set_attr "prefix_extra" "1")
8130 (set_attr "prefix" "maybe_vex")
8131 (set_attr "mode" "TI")])
8133 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
8134 ;; setting FLAGS_REG. But it is not a really compare instruction.
8135 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
8136 [(set (reg:CC FLAGS_REG)
8137 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
8138 (match_operand:VF 1 "nonimmediate_operand" "xm")]
8141 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
8142 [(set_attr "type" "ssecomi")
8143 (set_attr "prefix_extra" "1")
8144 (set_attr "prefix" "vex")
8145 (set_attr "mode" "<MODE>")])
8147 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
8148 ;; But it is not a really compare instruction.
8149 (define_insn "avx_ptest256"
8150 [(set (reg:CC FLAGS_REG)
8151 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
8152 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
8155 "vptest\t{%1, %0|%0, %1}"
8156 [(set_attr "type" "ssecomi")
8157 (set_attr "prefix_extra" "1")
8158 (set_attr "prefix" "vex")
8159 (set_attr "mode" "OI")])
8161 (define_insn "sse4_1_ptest"
8162 [(set (reg:CC FLAGS_REG)
8163 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
8164 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8167 "%vptest\t{%1, %0|%0, %1}"
8168 [(set_attr "type" "ssecomi")
8169 (set_attr "prefix_extra" "1")
8170 (set_attr "prefix" "maybe_vex")
8171 (set_attr "mode" "TI")])
8173 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
8174 [(set (match_operand:VF 0 "register_operand" "=x")
8176 [(match_operand:VF 1 "nonimmediate_operand" "xm")
8177 (match_operand:SI 2 "const_0_to_15_operand" "n")]
8180 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8181 [(set_attr "type" "ssecvt")
8182 (set (attr "prefix_data16")
8184 (ne (symbol_ref "TARGET_AVX") (const_int 0))
8186 (const_string "1")))
8187 (set_attr "prefix_extra" "1")
8188 (set_attr "length_immediate" "1")
8189 (set_attr "prefix" "maybe_vex")
8190 (set_attr "mode" "<MODE>")])
8192 (define_insn "sse4_1_round<ssescalarmodesuffix>"
8193 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
8196 [(match_operand:VF_128 2 "register_operand" "x,x")
8197 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
8199 (match_operand:VF_128 1 "register_operand" "0,x")
8203 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
8204 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8205 [(set_attr "isa" "noavx,avx")
8206 (set_attr "type" "ssecvt")
8207 (set_attr "length_immediate" "1")
8208 (set_attr "prefix_data16" "1,*")
8209 (set_attr "prefix_extra" "1")
8210 (set_attr "prefix" "orig,vex")
8211 (set_attr "mode" "<MODE>")])
8213 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8215 ;; Intel SSE4.2 string/text processing instructions
8217 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8219 (define_insn_and_split "sse4_2_pcmpestr"
8220 [(set (match_operand:SI 0 "register_operand" "=c,c")
8222 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
8223 (match_operand:SI 3 "register_operand" "a,a")
8224 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
8225 (match_operand:SI 5 "register_operand" "d,d")
8226 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
8228 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
8236 (set (reg:CC FLAGS_REG)
8245 && can_create_pseudo_p ()"
8250 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
8251 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
8252 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
8255 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
8256 operands[3], operands[4],
8257 operands[5], operands[6]));
8259 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
8260 operands[3], operands[4],
8261 operands[5], operands[6]));
8262 if (flags && !(ecx || xmm0))
8263 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
8264 operands[2], operands[3],
8265 operands[4], operands[5],
8269 [(set_attr "type" "sselog")
8270 (set_attr "prefix_data16" "1")
8271 (set_attr "prefix_extra" "1")
8272 (set_attr "length_immediate" "1")
8273 (set_attr "memory" "none,load")
8274 (set_attr "mode" "TI")])
8276 (define_insn "sse4_2_pcmpestri"
8277 [(set (match_operand:SI 0 "register_operand" "=c,c")
8279 [(match_operand:V16QI 1 "register_operand" "x,x")
8280 (match_operand:SI 2 "register_operand" "a,a")
8281 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
8282 (match_operand:SI 4 "register_operand" "d,d")
8283 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
8285 (set (reg:CC FLAGS_REG)
8294 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
8295 [(set_attr "type" "sselog")
8296 (set_attr "prefix_data16" "1")
8297 (set_attr "prefix_extra" "1")
8298 (set_attr "prefix" "maybe_vex")
8299 (set_attr "length_immediate" "1")
8300 (set_attr "memory" "none,load")
8301 (set_attr "mode" "TI")])
8303 (define_insn "sse4_2_pcmpestrm"
8304 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
8306 [(match_operand:V16QI 1 "register_operand" "x,x")
8307 (match_operand:SI 2 "register_operand" "a,a")
8308 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
8309 (match_operand:SI 4 "register_operand" "d,d")
8310 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
8312 (set (reg:CC FLAGS_REG)
8321 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
8322 [(set_attr "type" "sselog")
8323 (set_attr "prefix_data16" "1")
8324 (set_attr "prefix_extra" "1")
8325 (set_attr "length_immediate" "1")
8326 (set_attr "prefix" "maybe_vex")
8327 (set_attr "memory" "none,load")
8328 (set_attr "mode" "TI")])
8330 (define_insn "sse4_2_pcmpestr_cconly"
8331 [(set (reg:CC FLAGS_REG)
8333 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
8334 (match_operand:SI 3 "register_operand" "a,a,a,a")
8335 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
8336 (match_operand:SI 5 "register_operand" "d,d,d,d")
8337 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
8339 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
8340 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
8343 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
8344 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
8345 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
8346 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
8347 [(set_attr "type" "sselog")
8348 (set_attr "prefix_data16" "1")
8349 (set_attr "prefix_extra" "1")
8350 (set_attr "length_immediate" "1")
8351 (set_attr "memory" "none,load,none,load")
8352 (set_attr "prefix" "maybe_vex")
8353 (set_attr "mode" "TI")])
8355 (define_insn_and_split "sse4_2_pcmpistr"
8356 [(set (match_operand:SI 0 "register_operand" "=c,c")
8358 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
8359 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
8360 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
8362 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
8368 (set (reg:CC FLAGS_REG)
8375 && can_create_pseudo_p ()"
8380 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
8381 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
8382 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
8385 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
8386 operands[3], operands[4]));
8388 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
8389 operands[3], operands[4]));
8390 if (flags && !(ecx || xmm0))
8391 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
8392 operands[2], operands[3],
8396 [(set_attr "type" "sselog")
8397 (set_attr "prefix_data16" "1")
8398 (set_attr "prefix_extra" "1")
8399 (set_attr "length_immediate" "1")
8400 (set_attr "memory" "none,load")
8401 (set_attr "mode" "TI")])
8403 (define_insn "sse4_2_pcmpistri"
8404 [(set (match_operand:SI 0 "register_operand" "=c,c")
8406 [(match_operand:V16QI 1 "register_operand" "x,x")
8407 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
8408 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8410 (set (reg:CC FLAGS_REG)
8417 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
8418 [(set_attr "type" "sselog")
8419 (set_attr "prefix_data16" "1")
8420 (set_attr "prefix_extra" "1")
8421 (set_attr "length_immediate" "1")
8422 (set_attr "prefix" "maybe_vex")
8423 (set_attr "memory" "none,load")
8424 (set_attr "mode" "TI")])
8426 (define_insn "sse4_2_pcmpistrm"
8427 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
8429 [(match_operand:V16QI 1 "register_operand" "x,x")
8430 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
8431 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8433 (set (reg:CC FLAGS_REG)
8440 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
8441 [(set_attr "type" "sselog")
8442 (set_attr "prefix_data16" "1")
8443 (set_attr "prefix_extra" "1")
8444 (set_attr "length_immediate" "1")
8445 (set_attr "prefix" "maybe_vex")
8446 (set_attr "memory" "none,load")
8447 (set_attr "mode" "TI")])
8449 (define_insn "sse4_2_pcmpistr_cconly"
8450 [(set (reg:CC FLAGS_REG)
8452 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
8453 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
8454 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
8456 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
8457 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
8460 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
8461 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
8462 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
8463 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
8464 [(set_attr "type" "sselog")
8465 (set_attr "prefix_data16" "1")
8466 (set_attr "prefix_extra" "1")
8467 (set_attr "length_immediate" "1")
8468 (set_attr "memory" "none,load,none,load")
8469 (set_attr "prefix" "maybe_vex")
8470 (set_attr "mode" "TI")])
8472 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8476 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8478 ;; XOP parallel integer multiply/add instructions.
8479 ;; Note the XOP multiply/add instructions
8480 ;; a[i] = b[i] * c[i] + d[i];
8481 ;; do not allow the value being added to be a memory operation.
8482 (define_insn "xop_pmacsww"
8483 [(set (match_operand:V8HI 0 "register_operand" "=x")
8486 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8487 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
8488 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
8490 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8491 [(set_attr "type" "ssemuladd")
8492 (set_attr "mode" "TI")])
8494 (define_insn "xop_pmacssww"
8495 [(set (match_operand:V8HI 0 "register_operand" "=x")
8497 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8498 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
8499 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
8501 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8502 [(set_attr "type" "ssemuladd")
8503 (set_attr "mode" "TI")])
8505 (define_insn "xop_pmacsdd"
8506 [(set (match_operand:V4SI 0 "register_operand" "=x")
8509 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8510 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
8511 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8513 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8514 [(set_attr "type" "ssemuladd")
8515 (set_attr "mode" "TI")])
8517 (define_insn "xop_pmacssdd"
8518 [(set (match_operand:V4SI 0 "register_operand" "=x")
8520 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8521 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
8522 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8524 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8525 [(set_attr "type" "ssemuladd")
8526 (set_attr "mode" "TI")])
8528 (define_insn "xop_pmacssdql"
8529 [(set (match_operand:V2DI 0 "register_operand" "=x")
8534 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8535 (parallel [(const_int 1)
8538 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8539 (parallel [(const_int 1)
8541 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8543 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8544 [(set_attr "type" "ssemuladd")
8545 (set_attr "mode" "TI")])
8547 (define_insn "xop_pmacssdqh"
8548 [(set (match_operand:V2DI 0 "register_operand" "=x")
8553 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8554 (parallel [(const_int 0)
8558 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8559 (parallel [(const_int 0)
8561 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8563 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8564 [(set_attr "type" "ssemuladd")
8565 (set_attr "mode" "TI")])
8567 (define_insn "xop_pmacsdql"
8568 [(set (match_operand:V2DI 0 "register_operand" "=x")
8573 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8574 (parallel [(const_int 1)
8578 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8579 (parallel [(const_int 1)
8581 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8583 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8584 [(set_attr "type" "ssemuladd")
8585 (set_attr "mode" "TI")])
8587 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
8588 ;; fake it with a multiply/add. In general, we expect the define_split to
8589 ;; occur before register allocation, so we have to handle the corner case where
8590 ;; the target is the same as operands 1/2
8591 (define_insn_and_split "xop_mulv2div2di3_low"
8592 [(set (match_operand:V2DI 0 "register_operand" "=&x")
8596 (match_operand:V4SI 1 "register_operand" "%x")
8597 (parallel [(const_int 1)
8601 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8602 (parallel [(const_int 1)
8603 (const_int 3)])))))]
8606 "&& reload_completed"
8615 (parallel [(const_int 1)
8620 (parallel [(const_int 1)
8624 operands[3] = CONST0_RTX (V2DImode);
8626 [(set_attr "type" "ssemul")
8627 (set_attr "mode" "TI")])
8629 (define_insn "xop_pmacsdqh"
8630 [(set (match_operand:V2DI 0 "register_operand" "=x")
8635 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8636 (parallel [(const_int 0)
8640 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8641 (parallel [(const_int 0)
8643 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8645 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8646 [(set_attr "type" "ssemuladd")
8647 (set_attr "mode" "TI")])
8649 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
8650 ;; fake it with a multiply/add. In general, we expect the define_split to
8651 ;; occur before register allocation, so we have to handle the corner case where
8652 ;; the target is the same as either operands[1] or operands[2]
8653 (define_insn_and_split "xop_mulv2div2di3_high"
8654 [(set (match_operand:V2DI 0 "register_operand" "=&x")
8658 (match_operand:V4SI 1 "register_operand" "%x")
8659 (parallel [(const_int 0)
8663 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8664 (parallel [(const_int 0)
8665 (const_int 2)])))))]
8668 "&& reload_completed"
8677 (parallel [(const_int 0)
8682 (parallel [(const_int 0)
8686 operands[3] = CONST0_RTX (V2DImode);
8688 [(set_attr "type" "ssemul")
8689 (set_attr "mode" "TI")])
8691 ;; XOP parallel integer multiply/add instructions for the intrinisics
8692 (define_insn "xop_pmacsswd"
8693 [(set (match_operand:V4SI 0 "register_operand" "=x")
8698 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8699 (parallel [(const_int 1)
8705 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8706 (parallel [(const_int 1)
8710 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8712 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8713 [(set_attr "type" "ssemuladd")
8714 (set_attr "mode" "TI")])
8716 (define_insn "xop_pmacswd"
8717 [(set (match_operand:V4SI 0 "register_operand" "=x")
8722 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8723 (parallel [(const_int 1)
8729 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8730 (parallel [(const_int 1)
8734 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8736 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8737 [(set_attr "type" "ssemuladd")
8738 (set_attr "mode" "TI")])
8740 (define_insn "xop_pmadcsswd"
8741 [(set (match_operand:V4SI 0 "register_operand" "=x")
8747 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8748 (parallel [(const_int 0)
8754 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8755 (parallel [(const_int 0)
8763 (parallel [(const_int 1)
8770 (parallel [(const_int 1)
8774 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8776 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8777 [(set_attr "type" "ssemuladd")
8778 (set_attr "mode" "TI")])
8780 (define_insn "xop_pmadcswd"
8781 [(set (match_operand:V4SI 0 "register_operand" "=x")
8787 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8788 (parallel [(const_int 0)
8794 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8795 (parallel [(const_int 0)
8803 (parallel [(const_int 1)
8810 (parallel [(const_int 1)
8814 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8816 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8817 [(set_attr "type" "ssemuladd")
8818 (set_attr "mode" "TI")])
8820 ;; XOP parallel XMM conditional moves
8821 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
8822 [(set (match_operand:V 0 "register_operand" "=x,x")
8824 (match_operand:V 3 "nonimmediate_operand" "x,m")
8825 (match_operand:V 1 "vector_move_operand" "x,x")
8826 (match_operand:V 2 "vector_move_operand" "xm,x")))]
8828 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8829 [(set_attr "type" "sse4arg")])
8831 ;; XOP horizontal add/subtract instructions
8832 (define_insn "xop_phaddbw"
8833 [(set (match_operand:V8HI 0 "register_operand" "=x")
8837 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8838 (parallel [(const_int 0)
8849 (parallel [(const_int 1)
8856 (const_int 15)])))))]
8858 "vphaddbw\t{%1, %0|%0, %1}"
8859 [(set_attr "type" "sseiadd1")])
8861 (define_insn "xop_phaddbd"
8862 [(set (match_operand:V4SI 0 "register_operand" "=x")
8867 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8868 (parallel [(const_int 0)
8875 (parallel [(const_int 1)
8883 (parallel [(const_int 2)
8890 (parallel [(const_int 3)
8893 (const_int 15)]))))))]
8895 "vphaddbd\t{%1, %0|%0, %1}"
8896 [(set_attr "type" "sseiadd1")])
8898 (define_insn "xop_phaddbq"
8899 [(set (match_operand:V2DI 0 "register_operand" "=x")
8905 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8906 (parallel [(const_int 0)
8911 (parallel [(const_int 1)
8917 (parallel [(const_int 2)
8922 (parallel [(const_int 3)
8929 (parallel [(const_int 8)
8934 (parallel [(const_int 9)
8940 (parallel [(const_int 10)
8945 (parallel [(const_int 11)
8946 (const_int 15)])))))))]
8948 "vphaddbq\t{%1, %0|%0, %1}"
8949 [(set_attr "type" "sseiadd1")])
8951 (define_insn "xop_phaddwd"
8952 [(set (match_operand:V4SI 0 "register_operand" "=x")
8956 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8957 (parallel [(const_int 0)
8964 (parallel [(const_int 1)
8967 (const_int 7)])))))]
8969 "vphaddwd\t{%1, %0|%0, %1}"
8970 [(set_attr "type" "sseiadd1")])
8972 (define_insn "xop_phaddwq"
8973 [(set (match_operand:V2DI 0 "register_operand" "=x")
8978 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8979 (parallel [(const_int 0)
8984 (parallel [(const_int 1)
8990 (parallel [(const_int 2)
8995 (parallel [(const_int 3)
8996 (const_int 7)]))))))]
8998 "vphaddwq\t{%1, %0|%0, %1}"
8999 [(set_attr "type" "sseiadd1")])
9001 (define_insn "xop_phadddq"
9002 [(set (match_operand:V2DI 0 "register_operand" "=x")
9006 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9007 (parallel [(const_int 0)
9012 (parallel [(const_int 1)
9013 (const_int 3)])))))]
9015 "vphadddq\t{%1, %0|%0, %1}"
9016 [(set_attr "type" "sseiadd1")])
9018 (define_insn "xop_phaddubw"
9019 [(set (match_operand:V8HI 0 "register_operand" "=x")
9023 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9024 (parallel [(const_int 0)
9035 (parallel [(const_int 1)
9042 (const_int 15)])))))]
9044 "vphaddubw\t{%1, %0|%0, %1}"
9045 [(set_attr "type" "sseiadd1")])
9047 (define_insn "xop_phaddubd"
9048 [(set (match_operand:V4SI 0 "register_operand" "=x")
9053 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9054 (parallel [(const_int 0)
9061 (parallel [(const_int 1)
9069 (parallel [(const_int 2)
9076 (parallel [(const_int 3)
9079 (const_int 15)]))))))]
9081 "vphaddubd\t{%1, %0|%0, %1}"
9082 [(set_attr "type" "sseiadd1")])
9084 (define_insn "xop_phaddubq"
9085 [(set (match_operand:V2DI 0 "register_operand" "=x")
9091 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9092 (parallel [(const_int 0)
9097 (parallel [(const_int 1)
9103 (parallel [(const_int 2)
9108 (parallel [(const_int 3)
9115 (parallel [(const_int 8)
9120 (parallel [(const_int 9)
9126 (parallel [(const_int 10)
9131 (parallel [(const_int 11)
9132 (const_int 15)])))))))]
9134 "vphaddubq\t{%1, %0|%0, %1}"
9135 [(set_attr "type" "sseiadd1")])
9137 (define_insn "xop_phadduwd"
9138 [(set (match_operand:V4SI 0 "register_operand" "=x")
9142 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9143 (parallel [(const_int 0)
9150 (parallel [(const_int 1)
9153 (const_int 7)])))))]
9155 "vphadduwd\t{%1, %0|%0, %1}"
9156 [(set_attr "type" "sseiadd1")])
9158 (define_insn "xop_phadduwq"
9159 [(set (match_operand:V2DI 0 "register_operand" "=x")
9164 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9165 (parallel [(const_int 0)
9170 (parallel [(const_int 1)
9176 (parallel [(const_int 2)
9181 (parallel [(const_int 3)
9182 (const_int 7)]))))))]
9184 "vphadduwq\t{%1, %0|%0, %1}"
9185 [(set_attr "type" "sseiadd1")])
9187 (define_insn "xop_phaddudq"
9188 [(set (match_operand:V2DI 0 "register_operand" "=x")
9192 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9193 (parallel [(const_int 0)
9198 (parallel [(const_int 1)
9199 (const_int 3)])))))]
9201 "vphaddudq\t{%1, %0|%0, %1}"
9202 [(set_attr "type" "sseiadd1")])
9204 (define_insn "xop_phsubbw"
9205 [(set (match_operand:V8HI 0 "register_operand" "=x")
9209 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9210 (parallel [(const_int 0)
9221 (parallel [(const_int 1)
9228 (const_int 15)])))))]
9230 "vphsubbw\t{%1, %0|%0, %1}"
9231 [(set_attr "type" "sseiadd1")])
9233 (define_insn "xop_phsubwd"
9234 [(set (match_operand:V4SI 0 "register_operand" "=x")
9238 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9239 (parallel [(const_int 0)
9246 (parallel [(const_int 1)
9249 (const_int 7)])))))]
9251 "vphsubwd\t{%1, %0|%0, %1}"
9252 [(set_attr "type" "sseiadd1")])
9254 (define_insn "xop_phsubdq"
9255 [(set (match_operand:V2DI 0 "register_operand" "=x")
9259 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9260 (parallel [(const_int 0)
9265 (parallel [(const_int 1)
9266 (const_int 3)])))))]
9268 "vphsubdq\t{%1, %0|%0, %1}"
9269 [(set_attr "type" "sseiadd1")])
9271 ;; XOP permute instructions
9272 (define_insn "xop_pperm"
9273 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9275 [(match_operand:V16QI 1 "register_operand" "x,x")
9276 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9277 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
9278 UNSPEC_XOP_PERMUTE))]
9279 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9280 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9281 [(set_attr "type" "sse4arg")
9282 (set_attr "mode" "TI")])
9284 ;; XOP pack instructions that combine two vectors into a smaller vector
9285 (define_insn "xop_pperm_pack_v2di_v4si"
9286 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9289 (match_operand:V2DI 1 "register_operand" "x,x"))
9291 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
9292 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9293 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9294 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9295 [(set_attr "type" "sse4arg")
9296 (set_attr "mode" "TI")])
9298 (define_insn "xop_pperm_pack_v4si_v8hi"
9299 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9302 (match_operand:V4SI 1 "register_operand" "x,x"))
9304 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
9305 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9306 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9307 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9308 [(set_attr "type" "sse4arg")
9309 (set_attr "mode" "TI")])
9311 (define_insn "xop_pperm_pack_v8hi_v16qi"
9312 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9315 (match_operand:V8HI 1 "register_operand" "x,x"))
9317 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
9318 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9319 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9320 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9321 [(set_attr "type" "sse4arg")
9322 (set_attr "mode" "TI")])
9324 ;; XOP packed rotate instructions
9325 (define_expand "rotl<mode>3"
9326 [(set (match_operand:VI_128 0 "register_operand" "")
9328 (match_operand:VI_128 1 "nonimmediate_operand" "")
9329 (match_operand:SI 2 "general_operand")))]
9332 /* If we were given a scalar, convert it to parallel */
9333 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9335 rtvec vs = rtvec_alloc (<ssescalarnum>);
9336 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9337 rtx reg = gen_reg_rtx (<MODE>mode);
9338 rtx op2 = operands[2];
9341 if (GET_MODE (op2) != <ssescalarmode>mode)
9343 op2 = gen_reg_rtx (<ssescalarmode>mode);
9344 convert_move (op2, operands[2], false);
9347 for (i = 0; i < <ssescalarnum>; i++)
9348 RTVEC_ELT (vs, i) = op2;
9350 emit_insn (gen_vec_init<mode> (reg, par));
9351 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9356 (define_expand "rotr<mode>3"
9357 [(set (match_operand:VI_128 0 "register_operand" "")
9359 (match_operand:VI_128 1 "nonimmediate_operand" "")
9360 (match_operand:SI 2 "general_operand")))]
9363 /* If we were given a scalar, convert it to parallel */
9364 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9366 rtvec vs = rtvec_alloc (<ssescalarnum>);
9367 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9368 rtx neg = gen_reg_rtx (<MODE>mode);
9369 rtx reg = gen_reg_rtx (<MODE>mode);
9370 rtx op2 = operands[2];
9373 if (GET_MODE (op2) != <ssescalarmode>mode)
9375 op2 = gen_reg_rtx (<ssescalarmode>mode);
9376 convert_move (op2, operands[2], false);
9379 for (i = 0; i < <ssescalarnum>; i++)
9380 RTVEC_ELT (vs, i) = op2;
9382 emit_insn (gen_vec_init<mode> (reg, par));
9383 emit_insn (gen_neg<mode>2 (neg, reg));
9384 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
9389 (define_insn "xop_rotl<mode>3"
9390 [(set (match_operand:VI_128 0 "register_operand" "=x")
9392 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9393 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9395 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9396 [(set_attr "type" "sseishft")
9397 (set_attr "length_immediate" "1")
9398 (set_attr "mode" "TI")])
9400 (define_insn "xop_rotr<mode>3"
9401 [(set (match_operand:VI_128 0 "register_operand" "=x")
9403 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9404 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9407 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
9408 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
9410 [(set_attr "type" "sseishft")
9411 (set_attr "length_immediate" "1")
9412 (set_attr "mode" "TI")])
9414 (define_expand "vrotr<mode>3"
9415 [(match_operand:VI_128 0 "register_operand" "")
9416 (match_operand:VI_128 1 "register_operand" "")
9417 (match_operand:VI_128 2 "register_operand" "")]
9420 rtx reg = gen_reg_rtx (<MODE>mode);
9421 emit_insn (gen_neg<mode>2 (reg, operands[2]));
9422 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9426 (define_expand "vrotl<mode>3"
9427 [(match_operand:VI_128 0 "register_operand" "")
9428 (match_operand:VI_128 1 "register_operand" "")
9429 (match_operand:VI_128 2 "register_operand" "")]
9432 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
9436 (define_insn "xop_vrotl<mode>3"
9437 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9438 (if_then_else:VI_128
9440 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9443 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9447 (neg:VI_128 (match_dup 2)))))]
9448 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9449 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9450 [(set_attr "type" "sseishft")
9451 (set_attr "prefix_data16" "0")
9452 (set_attr "prefix_extra" "2")
9453 (set_attr "mode" "TI")])
9455 ;; XOP packed shift instructions.
9456 ;; FIXME: add V2DI back in
9457 (define_expand "vlshr<mode>3"
9458 [(match_operand:VI124_128 0 "register_operand" "")
9459 (match_operand:VI124_128 1 "register_operand" "")
9460 (match_operand:VI124_128 2 "register_operand" "")]
9463 rtx neg = gen_reg_rtx (<MODE>mode);
9464 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9465 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
9469 (define_expand "vashr<mode>3"
9470 [(match_operand:VI124_128 0 "register_operand" "")
9471 (match_operand:VI124_128 1 "register_operand" "")
9472 (match_operand:VI124_128 2 "register_operand" "")]
9475 rtx neg = gen_reg_rtx (<MODE>mode);
9476 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9477 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
9481 (define_expand "vashl<mode>3"
9482 [(match_operand:VI124_128 0 "register_operand" "")
9483 (match_operand:VI124_128 1 "register_operand" "")
9484 (match_operand:VI124_128 2 "register_operand" "")]
9487 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
9491 (define_insn "xop_ashl<mode>3"
9492 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9493 (if_then_else:VI_128
9495 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9498 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9502 (neg:VI_128 (match_dup 2)))))]
9503 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9504 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9505 [(set_attr "type" "sseishft")
9506 (set_attr "prefix_data16" "0")
9507 (set_attr "prefix_extra" "2")
9508 (set_attr "mode" "TI")])
9510 (define_insn "xop_lshl<mode>3"
9511 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9512 (if_then_else:VI_128
9514 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9517 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9521 (neg:VI_128 (match_dup 2)))))]
9522 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9523 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9524 [(set_attr "type" "sseishft")
9525 (set_attr "prefix_data16" "0")
9526 (set_attr "prefix_extra" "2")
9527 (set_attr "mode" "TI")])
9529 ;; SSE2 doesn't have some shift varients, so define versions for XOP
9530 (define_expand "ashlv16qi3"
9531 [(match_operand:V16QI 0 "register_operand" "")
9532 (match_operand:V16QI 1 "register_operand" "")
9533 (match_operand:SI 2 "nonmemory_operand" "")]
9536 rtvec vs = rtvec_alloc (16);
9537 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9538 rtx reg = gen_reg_rtx (V16QImode);
9540 for (i = 0; i < 16; i++)
9541 RTVEC_ELT (vs, i) = operands[2];
9543 emit_insn (gen_vec_initv16qi (reg, par));
9544 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
9548 (define_expand "lshlv16qi3"
9549 [(match_operand:V16QI 0 "register_operand" "")
9550 (match_operand:V16QI 1 "register_operand" "")
9551 (match_operand:SI 2 "nonmemory_operand" "")]
9554 rtvec vs = rtvec_alloc (16);
9555 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9556 rtx reg = gen_reg_rtx (V16QImode);
9558 for (i = 0; i < 16; i++)
9559 RTVEC_ELT (vs, i) = operands[2];
9561 emit_insn (gen_vec_initv16qi (reg, par));
9562 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
9566 (define_expand "ashrv16qi3"
9567 [(match_operand:V16QI 0 "register_operand" "")
9568 (match_operand:V16QI 1 "register_operand" "")
9569 (match_operand:SI 2 "nonmemory_operand" "")]
9572 rtvec vs = rtvec_alloc (16);
9573 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9574 rtx reg = gen_reg_rtx (V16QImode);
9576 rtx ele = ((CONST_INT_P (operands[2]))
9577 ? GEN_INT (- INTVAL (operands[2]))
9580 for (i = 0; i < 16; i++)
9581 RTVEC_ELT (vs, i) = ele;
9583 emit_insn (gen_vec_initv16qi (reg, par));
9585 if (!CONST_INT_P (operands[2]))
9587 rtx neg = gen_reg_rtx (V16QImode);
9588 emit_insn (gen_negv16qi2 (neg, reg));
9589 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
9592 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
9597 (define_expand "ashrv2di3"
9598 [(match_operand:V2DI 0 "register_operand" "")
9599 (match_operand:V2DI 1 "register_operand" "")
9600 (match_operand:DI 2 "nonmemory_operand" "")]
9603 rtvec vs = rtvec_alloc (2);
9604 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
9605 rtx reg = gen_reg_rtx (V2DImode);
9608 if (CONST_INT_P (operands[2]))
9609 ele = GEN_INT (- INTVAL (operands[2]));
9610 else if (GET_MODE (operands[2]) != DImode)
9612 rtx move = gen_reg_rtx (DImode);
9613 ele = gen_reg_rtx (DImode);
9614 convert_move (move, operands[2], false);
9615 emit_insn (gen_negdi2 (ele, move));
9619 ele = gen_reg_rtx (DImode);
9620 emit_insn (gen_negdi2 (ele, operands[2]));
9623 RTVEC_ELT (vs, 0) = ele;
9624 RTVEC_ELT (vs, 1) = ele;
9625 emit_insn (gen_vec_initv2di (reg, par));
9626 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
9631 (define_insn "xop_frcz<mode>2"
9632 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
9634 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
9637 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
9638 [(set_attr "type" "ssecvt1")
9639 (set_attr "mode" "<MODE>")])
9642 (define_expand "xop_vmfrcz<mode>2"
9643 [(set (match_operand:VF_128 0 "register_operand")
9646 [(match_operand:VF_128 1 "nonimmediate_operand")]
9652 operands[3] = CONST0_RTX (<MODE>mode);
9655 (define_insn "*xop_vmfrcz_<mode>"
9656 [(set (match_operand:VF_128 0 "register_operand" "=x")
9659 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
9661 (match_operand:VF_128 2 "const0_operand")
9664 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9665 [(set_attr "type" "ssecvt1")
9666 (set_attr "mode" "<MODE>")])
9668 (define_insn "xop_maskcmp<mode>3"
9669 [(set (match_operand:VI_128 0 "register_operand" "=x")
9670 (match_operator:VI_128 1 "ix86_comparison_int_operator"
9671 [(match_operand:VI_128 2 "register_operand" "x")
9672 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
9674 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9675 [(set_attr "type" "sse4arg")
9676 (set_attr "prefix_data16" "0")
9677 (set_attr "prefix_rep" "0")
9678 (set_attr "prefix_extra" "2")
9679 (set_attr "length_immediate" "1")
9680 (set_attr "mode" "TI")])
9682 (define_insn "xop_maskcmp_uns<mode>3"
9683 [(set (match_operand:VI_128 0 "register_operand" "=x")
9684 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
9685 [(match_operand:VI_128 2 "register_operand" "x")
9686 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
9688 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9689 [(set_attr "type" "ssecmp")
9690 (set_attr "prefix_data16" "0")
9691 (set_attr "prefix_rep" "0")
9692 (set_attr "prefix_extra" "2")
9693 (set_attr "length_immediate" "1")
9694 (set_attr "mode" "TI")])
9696 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
9697 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
9698 ;; the exact instruction generated for the intrinsic.
9699 (define_insn "xop_maskcmp_uns2<mode>3"
9700 [(set (match_operand:VI_128 0 "register_operand" "=x")
9702 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
9703 [(match_operand:VI_128 2 "register_operand" "x")
9704 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
9705 UNSPEC_XOP_UNSIGNED_CMP))]
9707 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9708 [(set_attr "type" "ssecmp")
9709 (set_attr "prefix_data16" "0")
9710 (set_attr "prefix_extra" "2")
9711 (set_attr "length_immediate" "1")
9712 (set_attr "mode" "TI")])
9714 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
9715 ;; being added here to be complete.
9716 (define_insn "xop_pcom_tf<mode>3"
9717 [(set (match_operand:VI_128 0 "register_operand" "=x")
9719 [(match_operand:VI_128 1 "register_operand" "x")
9720 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
9721 (match_operand:SI 3 "const_int_operand" "n")]
9722 UNSPEC_XOP_TRUEFALSE))]
9725 return ((INTVAL (operands[3]) != 0)
9726 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9727 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
9729 [(set_attr "type" "ssecmp")
9730 (set_attr "prefix_data16" "0")
9731 (set_attr "prefix_extra" "2")
9732 (set_attr "length_immediate" "1")
9733 (set_attr "mode" "TI")])
9735 (define_insn "xop_vpermil2<mode>3"
9736 [(set (match_operand:VF 0 "register_operand" "=x")
9738 [(match_operand:VF 1 "register_operand" "x")
9739 (match_operand:VF 2 "nonimmediate_operand" "%x")
9740 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
9741 (match_operand:SI 4 "const_0_to_3_operand" "n")]
9744 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
9745 [(set_attr "type" "sse4arg")
9746 (set_attr "length_immediate" "1")
9747 (set_attr "mode" "<MODE>")])
9749 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9751 (define_insn "aesenc"
9752 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9753 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9754 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9758 aesenc\t{%2, %0|%0, %2}
9759 vaesenc\t{%2, %1, %0|%0, %1, %2}"
9760 [(set_attr "isa" "noavx,avx")
9761 (set_attr "type" "sselog1")
9762 (set_attr "prefix_extra" "1")
9763 (set_attr "prefix" "orig,vex")
9764 (set_attr "mode" "TI")])
9766 (define_insn "aesenclast"
9767 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9768 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9769 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9770 UNSPEC_AESENCLAST))]
9773 aesenclast\t{%2, %0|%0, %2}
9774 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
9775 [(set_attr "isa" "noavx,avx")
9776 (set_attr "type" "sselog1")
9777 (set_attr "prefix_extra" "1")
9778 (set_attr "prefix" "orig,vex")
9779 (set_attr "mode" "TI")])
9781 (define_insn "aesdec"
9782 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9783 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9784 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9788 aesdec\t{%2, %0|%0, %2}
9789 vaesdec\t{%2, %1, %0|%0, %1, %2}"
9790 [(set_attr "isa" "noavx,avx")
9791 (set_attr "type" "sselog1")
9792 (set_attr "prefix_extra" "1")
9793 (set_attr "prefix" "orig,vex")
9794 (set_attr "mode" "TI")])
9796 (define_insn "aesdeclast"
9797 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9798 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9799 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9800 UNSPEC_AESDECLAST))]
9803 aesdeclast\t{%2, %0|%0, %2}
9804 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
9805 [(set_attr "isa" "noavx,avx")
9806 (set_attr "type" "sselog1")
9807 (set_attr "prefix_extra" "1")
9808 (set_attr "prefix" "orig,vex")
9809 (set_attr "mode" "TI")])
9811 (define_insn "aesimc"
9812 [(set (match_operand:V2DI 0 "register_operand" "=x")
9813 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9816 "%vaesimc\t{%1, %0|%0, %1}"
9817 [(set_attr "type" "sselog1")
9818 (set_attr "prefix_extra" "1")
9819 (set_attr "prefix" "maybe_vex")
9820 (set_attr "mode" "TI")])
9822 (define_insn "aeskeygenassist"
9823 [(set (match_operand:V2DI 0 "register_operand" "=x")
9824 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
9825 (match_operand:SI 2 "const_0_to_255_operand" "n")]
9826 UNSPEC_AESKEYGENASSIST))]
9828 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
9829 [(set_attr "type" "sselog1")
9830 (set_attr "prefix_extra" "1")
9831 (set_attr "length_immediate" "1")
9832 (set_attr "prefix" "maybe_vex")
9833 (set_attr "mode" "TI")])
9835 (define_insn "pclmulqdq"
9836 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9837 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9838 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
9839 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9843 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
9844 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9845 [(set_attr "isa" "noavx,avx")
9846 (set_attr "type" "sselog1")
9847 (set_attr "prefix_extra" "1")
9848 (set_attr "length_immediate" "1")
9849 (set_attr "prefix" "orig,vex")
9850 (set_attr "mode" "TI")])
9852 (define_expand "avx_vzeroall"
9853 [(match_par_dup 0 [(const_int 0)])]
9856 int nregs = TARGET_64BIT ? 16 : 8;
9859 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
9861 XVECEXP (operands[0], 0, 0)
9862 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
9865 for (regno = 0; regno < nregs; regno++)
9866 XVECEXP (operands[0], 0, regno + 1)
9867 = gen_rtx_SET (VOIDmode,
9868 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
9869 CONST0_RTX (V8SImode));
9872 (define_insn "*avx_vzeroall"
9873 [(match_parallel 0 "vzeroall_operation"
9874 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
9877 [(set_attr "type" "sse")
9878 (set_attr "modrm" "0")
9879 (set_attr "memory" "none")
9880 (set_attr "prefix" "vex")
9881 (set_attr "mode" "OI")])
9883 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
9884 ;; if the upper 128bits are unused.
9885 (define_insn "avx_vzeroupper"
9886 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
9887 UNSPECV_VZEROUPPER)]
9890 [(set_attr "type" "sse")
9891 (set_attr "modrm" "0")
9892 (set_attr "memory" "none")
9893 (set_attr "prefix" "vex")
9894 (set_attr "mode" "OI")])
9896 (define_insn "vec_dup<mode>"
9897 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
9898 (vec_duplicate:AVX256MODE24P
9899 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
9902 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
9904 [(set_attr "type" "ssemov")
9905 (set_attr "prefix_extra" "1")
9906 (set_attr "prefix" "vex")
9907 (set_attr "mode" "V8SF")])
9910 [(set (match_operand:AVX256MODE24P 0 "register_operand" "")
9911 (vec_duplicate:AVX256MODE24P
9912 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
9913 "TARGET_AVX && reload_completed"
9914 [(set (match_dup 2) (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
9915 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
9916 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
9918 (define_insn "avx_vbroadcastf128_<mode>"
9919 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
9921 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
9925 vbroadcastf128\t{%1, %0|%0, %1}
9926 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
9927 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
9928 [(set_attr "type" "ssemov,sselog1,sselog1")
9929 (set_attr "prefix_extra" "1")
9930 (set_attr "length_immediate" "0,1,1")
9931 (set_attr "prefix" "vex")
9932 (set_attr "mode" "V4SF,V8SF,V8SF")])
9934 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
9935 ;; If it so happens that the input is in memory, use vbroadcast.
9936 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
9937 (define_insn "*avx_vperm_broadcast_v4sf"
9938 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
9940 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
9941 (match_parallel 2 "avx_vbroadcast_operand"
9942 [(match_operand 3 "const_int_operand" "C,n,n")])))]
9945 int elt = INTVAL (operands[3]);
9946 switch (which_alternative)
9950 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
9951 return "vbroadcastss\t{%1, %0|%0, %1}";
9953 operands[2] = GEN_INT (elt * 0x55);
9954 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
9959 [(set_attr "type" "ssemov,ssemov,sselog1")
9960 (set_attr "prefix_extra" "1")
9961 (set_attr "length_immediate" "0,0,1")
9962 (set_attr "prefix" "vex")
9963 (set_attr "mode" "SF,SF,V4SF")])
9965 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
9966 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
9968 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
9969 (match_parallel 2 "avx_vbroadcast_operand"
9970 [(match_operand 3 "const_int_operand" "C,n,n")])))]
9973 "&& reload_completed"
9974 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
9976 rtx op0 = operands[0], op1 = operands[1];
9977 int elt = INTVAL (operands[3]);
9983 /* Shuffle element we care about into all elements of the 128-bit lane.
9984 The other lane gets shuffled too, but we don't care. */
9985 if (<MODE>mode == V4DFmode)
9986 mask = (elt & 1 ? 15 : 0);
9988 mask = (elt & 3) * 0x55;
9989 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
9991 /* Shuffle the lane we care about into both lanes of the dest. */
9992 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
9993 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
9997 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
9998 elt * GET_MODE_SIZE (<ssescalarmode>mode));
10001 (define_expand "avx_vpermil<mode>"
10002 [(set (match_operand:VF2 0 "register_operand" "")
10004 (match_operand:VF2 1 "nonimmediate_operand" "")
10005 (match_operand:SI 2 "const_0_to_255_operand" "")))]
10008 int mask = INTVAL (operands[2]);
10009 rtx perm[<ssescalarnum>];
10011 perm[0] = GEN_INT (mask & 1);
10012 perm[1] = GEN_INT ((mask >> 1) & 1);
10013 if (<MODE>mode == V4DFmode)
10015 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
10016 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
10020 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10023 (define_expand "avx_vpermil<mode>"
10024 [(set (match_operand:VF1 0 "register_operand" "")
10026 (match_operand:VF1 1 "nonimmediate_operand" "")
10027 (match_operand:SI 2 "const_0_to_255_operand" "")))]
10030 int mask = INTVAL (operands[2]);
10031 rtx perm[<ssescalarnum>];
10033 perm[0] = GEN_INT (mask & 3);
10034 perm[1] = GEN_INT ((mask >> 2) & 3);
10035 perm[2] = GEN_INT ((mask >> 4) & 3);
10036 perm[3] = GEN_INT ((mask >> 6) & 3);
10037 if (<MODE>mode == V8SFmode)
10039 perm[4] = GEN_INT ((mask & 3) + 4);
10040 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
10041 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
10042 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
10046 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10049 (define_insn "*avx_vpermilp<mode>"
10050 [(set (match_operand:VF 0 "register_operand" "=x")
10052 (match_operand:VF 1 "nonimmediate_operand" "xm")
10053 (match_parallel 2 ""
10054 [(match_operand 3 "const_int_operand" "")])))]
10056 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
10058 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
10059 operands[2] = GEN_INT (mask);
10060 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
10062 [(set_attr "type" "sselog")
10063 (set_attr "prefix_extra" "1")
10064 (set_attr "length_immediate" "1")
10065 (set_attr "prefix" "vex")
10066 (set_attr "mode" "<MODE>")])
10068 (define_insn "avx_vpermilvar<mode>3"
10069 [(set (match_operand:VF 0 "register_operand" "=x")
10071 [(match_operand:VF 1 "register_operand" "x")
10072 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
10075 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10076 [(set_attr "type" "sselog")
10077 (set_attr "prefix_extra" "1")
10078 (set_attr "prefix" "vex")
10079 (set_attr "mode" "<MODE>")])
10081 (define_expand "avx_vperm2f128<mode>3"
10082 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
10083 (unspec:AVX256MODE2P
10084 [(match_operand:AVX256MODE2P 1 "register_operand" "")
10085 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
10086 (match_operand:SI 3 "const_0_to_255_operand" "")]
10087 UNSPEC_VPERMIL2F128))]
10090 int mask = INTVAL (operands[3]);
10091 if ((mask & 0x88) == 0)
10093 rtx perm[<ssescalarnum>], t1, t2;
10094 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
10096 base = (mask & 3) * nelt2;
10097 for (i = 0; i < nelt2; ++i)
10098 perm[i] = GEN_INT (base + i);
10100 base = ((mask >> 4) & 3) * nelt2;
10101 for (i = 0; i < nelt2; ++i)
10102 perm[i + nelt2] = GEN_INT (base + i);
10104 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
10105 operands[1], operands[2]);
10106 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
10107 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
10108 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
10114 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
10115 ;; means that in order to represent this properly in rtl we'd have to
10116 ;; nest *another* vec_concat with a zero operand and do the select from
10117 ;; a 4x wide vector. That doesn't seem very nice.
10118 (define_insn "*avx_vperm2f128<mode>_full"
10119 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10120 (unspec:AVX256MODE2P
10121 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
10122 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
10123 (match_operand:SI 3 "const_0_to_255_operand" "n")]
10124 UNSPEC_VPERMIL2F128))]
10126 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10127 [(set_attr "type" "sselog")
10128 (set_attr "prefix_extra" "1")
10129 (set_attr "length_immediate" "1")
10130 (set_attr "prefix" "vex")
10131 (set_attr "mode" "V8SF")])
10133 (define_insn "*avx_vperm2f128<mode>_nozero"
10134 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10135 (vec_select:AVX256MODE2P
10136 (vec_concat:<ssedoublevecmode>
10137 (match_operand:AVX256MODE2P 1 "register_operand" "x")
10138 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
10139 (match_parallel 3 ""
10140 [(match_operand 4 "const_int_operand" "")])))]
10142 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
10144 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
10145 operands[3] = GEN_INT (mask);
10146 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10148 [(set_attr "type" "sselog")
10149 (set_attr "prefix_extra" "1")
10150 (set_attr "length_immediate" "1")
10151 (set_attr "prefix" "vex")
10152 (set_attr "mode" "V8SF")])
10154 (define_expand "avx_vinsertf128<mode>"
10155 [(match_operand:V_256 0 "register_operand" "")
10156 (match_operand:V_256 1 "register_operand" "")
10157 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
10158 (match_operand:SI 3 "const_0_to_1_operand" "")]
10161 rtx (*insn)(rtx, rtx, rtx);
10163 switch (INTVAL (operands[3]))
10166 insn = gen_vec_set_lo_<mode>;
10169 insn = gen_vec_set_hi_<mode>;
10172 gcc_unreachable ();
10175 emit_insn (insn (operands[0], operands[1], operands[2]));
10179 (define_insn "vec_set_lo_<mode>"
10180 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10181 (vec_concat:VI8F_256
10182 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10183 (vec_select:<ssehalfvecmode>
10184 (match_operand:VI8F_256 1 "register_operand" "x")
10185 (parallel [(const_int 2) (const_int 3)]))))]
10187 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10188 [(set_attr "type" "sselog")
10189 (set_attr "prefix_extra" "1")
10190 (set_attr "length_immediate" "1")
10191 (set_attr "prefix" "vex")
10192 (set_attr "mode" "V8SF")])
10194 (define_insn "vec_set_hi_<mode>"
10195 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10196 (vec_concat:VI8F_256
10197 (vec_select:<ssehalfvecmode>
10198 (match_operand:VI8F_256 1 "register_operand" "x")
10199 (parallel [(const_int 0) (const_int 1)]))
10200 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10202 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10203 [(set_attr "type" "sselog")
10204 (set_attr "prefix_extra" "1")
10205 (set_attr "length_immediate" "1")
10206 (set_attr "prefix" "vex")
10207 (set_attr "mode" "V8SF")])
10209 (define_insn "vec_set_lo_<mode>"
10210 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10211 (vec_concat:VI4F_256
10212 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10213 (vec_select:<ssehalfvecmode>
10214 (match_operand:VI4F_256 1 "register_operand" "x")
10215 (parallel [(const_int 4) (const_int 5)
10216 (const_int 6) (const_int 7)]))))]
10218 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10219 [(set_attr "type" "sselog")
10220 (set_attr "prefix_extra" "1")
10221 (set_attr "length_immediate" "1")
10222 (set_attr "prefix" "vex")
10223 (set_attr "mode" "V8SF")])
10225 (define_insn "vec_set_hi_<mode>"
10226 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10227 (vec_concat:VI4F_256
10228 (vec_select:<ssehalfvecmode>
10229 (match_operand:VI4F_256 1 "register_operand" "x")
10230 (parallel [(const_int 0) (const_int 1)
10231 (const_int 2) (const_int 3)]))
10232 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10234 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10235 [(set_attr "type" "sselog")
10236 (set_attr "prefix_extra" "1")
10237 (set_attr "length_immediate" "1")
10238 (set_attr "prefix" "vex")
10239 (set_attr "mode" "V8SF")])
10241 (define_insn "vec_set_lo_v16hi"
10242 [(set (match_operand:V16HI 0 "register_operand" "=x")
10244 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10246 (match_operand:V16HI 1 "register_operand" "x")
10247 (parallel [(const_int 8) (const_int 9)
10248 (const_int 10) (const_int 11)
10249 (const_int 12) (const_int 13)
10250 (const_int 14) (const_int 15)]))))]
10252 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10253 [(set_attr "type" "sselog")
10254 (set_attr "prefix_extra" "1")
10255 (set_attr "length_immediate" "1")
10256 (set_attr "prefix" "vex")
10257 (set_attr "mode" "V8SF")])
10259 (define_insn "vec_set_hi_v16hi"
10260 [(set (match_operand:V16HI 0 "register_operand" "=x")
10263 (match_operand:V16HI 1 "register_operand" "x")
10264 (parallel [(const_int 0) (const_int 1)
10265 (const_int 2) (const_int 3)
10266 (const_int 4) (const_int 5)
10267 (const_int 6) (const_int 7)]))
10268 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
10270 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10271 [(set_attr "type" "sselog")
10272 (set_attr "prefix_extra" "1")
10273 (set_attr "length_immediate" "1")
10274 (set_attr "prefix" "vex")
10275 (set_attr "mode" "V8SF")])
10277 (define_insn "vec_set_lo_v32qi"
10278 [(set (match_operand:V32QI 0 "register_operand" "=x")
10280 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
10282 (match_operand:V32QI 1 "register_operand" "x")
10283 (parallel [(const_int 16) (const_int 17)
10284 (const_int 18) (const_int 19)
10285 (const_int 20) (const_int 21)
10286 (const_int 22) (const_int 23)
10287 (const_int 24) (const_int 25)
10288 (const_int 26) (const_int 27)
10289 (const_int 28) (const_int 29)
10290 (const_int 30) (const_int 31)]))))]
10292 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10293 [(set_attr "type" "sselog")
10294 (set_attr "prefix_extra" "1")
10295 (set_attr "length_immediate" "1")
10296 (set_attr "prefix" "vex")
10297 (set_attr "mode" "V8SF")])
10299 (define_insn "vec_set_hi_v32qi"
10300 [(set (match_operand:V32QI 0 "register_operand" "=x")
10303 (match_operand:V32QI 1 "register_operand" "x")
10304 (parallel [(const_int 0) (const_int 1)
10305 (const_int 2) (const_int 3)
10306 (const_int 4) (const_int 5)
10307 (const_int 6) (const_int 7)
10308 (const_int 8) (const_int 9)
10309 (const_int 10) (const_int 11)
10310 (const_int 12) (const_int 13)
10311 (const_int 14) (const_int 15)]))
10312 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
10314 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10315 [(set_attr "type" "sselog")
10316 (set_attr "prefix_extra" "1")
10317 (set_attr "length_immediate" "1")
10318 (set_attr "prefix" "vex")
10319 (set_attr "mode" "V8SF")])
10321 (define_expand "avx_maskload<ssemodesuffix><avxsizesuffix>"
10322 [(set (match_operand:VF 0 "register_operand" "")
10324 [(match_operand:<sseintvecmode> 2 "register_operand" "")
10325 (match_operand:VF 1 "memory_operand" "")
10330 (define_expand "avx_maskstore<ssemodesuffix><avxsizesuffix>"
10331 [(set (match_operand:VF 0 "memory_operand" "")
10333 [(match_operand:<sseintvecmode> 1 "register_operand" "")
10334 (match_operand:VF 2 "register_operand" "")
10339 (define_insn "*avx_maskmov<ssemodesuffix><avxsizesuffix>"
10340 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
10342 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
10343 (match_operand:VF 2 "nonimmediate_operand" "m,x")
10347 && (REG_P (operands[0]) == MEM_P (operands[2]))"
10348 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10349 [(set_attr "type" "sselog1")
10350 (set_attr "prefix_extra" "1")
10351 (set_attr "prefix" "vex")
10352 (set_attr "mode" "<MODE>")])
10354 (define_insn_and_split "avx_<ssemodesuffix><avxsizesuffix>_<ssemodesuffix>"
10355 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
10356 (unspec:AVX256MODE2P
10357 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
10361 "&& reload_completed"
10364 rtx op1 = operands[1];
10366 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
10368 op1 = gen_lowpart (<MODE>mode, op1);
10369 emit_move_insn (operands[0], op1);
10373 (define_expand "vec_init<mode>"
10374 [(match_operand:V_256 0 "register_operand" "")
10375 (match_operand 1 "" "")]
10378 ix86_expand_vector_init (false, operands[0], operands[1]);
10382 (define_insn "*vec_concat<mode>_avx"
10383 [(set (match_operand:V_256 0 "register_operand" "=x,x")
10385 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
10386 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
10389 switch (which_alternative)
10392 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
10394 switch (get_attr_mode (insn))
10397 return "vmovaps\t{%1, %x0|%x0, %1}";
10399 return "vmovapd\t{%1, %x0|%x0, %1}";
10401 return "vmovdqa\t{%1, %x0|%x0, %1}";
10404 gcc_unreachable ();
10407 [(set_attr "type" "sselog,ssemov")
10408 (set_attr "prefix_extra" "1,*")
10409 (set_attr "length_immediate" "1,*")
10410 (set_attr "prefix" "vex")
10411 (set_attr "mode" "<sseinsnmode>")])
10413 (define_insn "vcvtph2ps"
10414 [(set (match_operand:V4SF 0 "register_operand" "=x")
10416 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
10418 (parallel [(const_int 0) (const_int 1)
10419 (const_int 1) (const_int 2)])))]
10421 "vcvtph2ps\t{%1, %0|%0, %1}"
10422 [(set_attr "type" "ssecvt")
10423 (set_attr "prefix" "vex")
10424 (set_attr "mode" "V4SF")])
10426 (define_insn "*vcvtph2ps_load"
10427 [(set (match_operand:V4SF 0 "register_operand" "=x")
10428 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
10429 UNSPEC_VCVTPH2PS))]
10431 "vcvtph2ps\t{%1, %0|%0, %1}"
10432 [(set_attr "type" "ssecvt")
10433 (set_attr "prefix" "vex")
10434 (set_attr "mode" "V8SF")])
10436 (define_insn "vcvtph2ps256"
10437 [(set (match_operand:V8SF 0 "register_operand" "=x")
10438 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
10439 UNSPEC_VCVTPH2PS))]
10441 "vcvtph2ps\t{%1, %0|%0, %1}"
10442 [(set_attr "type" "ssecvt")
10443 (set_attr "prefix" "vex")
10444 (set_attr "mode" "V8SF")])
10446 (define_expand "vcvtps2ph"
10447 [(set (match_operand:V8HI 0 "register_operand" "")
10449 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
10450 (match_operand:SI 2 "immediate_operand" "")]
10454 "operands[3] = CONST0_RTX (V4HImode);")
10456 (define_insn "*vcvtps2ph"
10457 [(set (match_operand:V8HI 0 "register_operand" "=x")
10459 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
10460 (match_operand:SI 2 "immediate_operand" "N")]
10462 (match_operand:V4HI 3 "const0_operand" "")))]
10464 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10465 [(set_attr "type" "ssecvt")
10466 (set_attr "prefix" "vex")
10467 (set_attr "mode" "V4SF")])
10469 (define_insn "*vcvtps2ph_store"
10470 [(set (match_operand:V4HI 0 "memory_operand" "=m")
10471 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
10472 (match_operand:SI 2 "immediate_operand" "N")]
10473 UNSPEC_VCVTPS2PH))]
10475 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10476 [(set_attr "type" "ssecvt")
10477 (set_attr "prefix" "vex")
10478 (set_attr "mode" "V4SF")])
10480 (define_insn "vcvtps2ph256"
10481 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
10482 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
10483 (match_operand:SI 2 "immediate_operand" "N")]
10484 UNSPEC_VCVTPS2PH))]
10486 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10487 [(set_attr "type" "ssecvt")
10488 (set_attr "prefix" "vex")
10489 (set_attr "mode" "V8SF")])