tree-data-ref.c (dr_analyze_alias): Handle case smt is NULL.
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
21
22
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
26
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29
30 ;; Mix-n-match
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
36
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
39
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
41
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43 ;;
44 ;; Move patterns
45 ;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
50
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
54 "TARGET_SSE"
55 {
56 ix86_expand_vector_move (<MODE>mode, operands);
57 DONE;
58 })
59
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
63 "TARGET_SSE
64 && (register_operand (operands[0], <MODE>mode)
65 || register_operand (operands[1], <MODE>mode))"
66 {
67 switch (which_alternative)
68 {
69 case 0:
70 return standard_sse_constant_opcode (insn, operands[1]);
71 case 1:
72 case 2:
73 if (get_attr_mode (insn) == MODE_V4SF)
74 return "movaps\t{%1, %0|%0, %1}";
75 else
76 return "movdqa\t{%1, %0|%0, %1}";
77 default:
78 gcc_unreachable ();
79 }
80 }
81 [(set_attr "type" "sselog1,ssemov,ssemov")
82 (set (attr "mode")
83 (if_then_else
84 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
85 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
86 (and (eq_attr "alternative" "2")
87 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
88 (const_int 0))))
89 (const_string "V4SF")
90 (const_string "TI")))])
91
92 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
93 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
94 ;; from memory, we'd prefer to load the memory directly into the %xmm
95 ;; register. To facilitate this happy circumstance, this pattern won't
96 ;; split until after register allocation. If the 64-bit value didn't
97 ;; come from memory, this is the best we can do. This is much better
98 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
99 ;; from there.
100
101 (define_insn_and_split "movdi_to_sse"
102 [(parallel
103 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
104 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
105 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
106 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
107 "#"
108 "&& reload_completed"
109 [(const_int 0)]
110 {
111 switch (which_alternative)
112 {
113 case 0:
114 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
115 Assemble the 64-bit DImode value in an xmm register. */
116 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
117 gen_rtx_SUBREG (SImode, operands[1], 0)));
118 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
119 gen_rtx_SUBREG (SImode, operands[1], 4)));
120 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
121 break;
122
123 case 1:
124 emit_insn (gen_vec_concatv2di (operands[0], operands[1], const0_rtx));
125 break;
126
127 default:
128 gcc_unreachable ();
129 }
130 DONE;
131 })
132
133 (define_expand "movv4sf"
134 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
135 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
136 "TARGET_SSE"
137 {
138 ix86_expand_vector_move (V4SFmode, operands);
139 DONE;
140 })
141
142 (define_insn "*movv4sf_internal"
143 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
144 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
145 "TARGET_SSE
146 && (register_operand (operands[0], V4SFmode)
147 || register_operand (operands[1], V4SFmode))"
148 {
149 switch (which_alternative)
150 {
151 case 0:
152 return standard_sse_constant_opcode (insn, operands[1]);
153 case 1:
154 case 2:
155 return "movaps\t{%1, %0|%0, %1}";
156 default:
157 abort();
158 }
159 }
160 [(set_attr "type" "sselog1,ssemov,ssemov")
161 (set_attr "mode" "V4SF")])
162
163 (define_split
164 [(set (match_operand:V4SF 0 "register_operand" "")
165 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
166 "TARGET_SSE && reload_completed"
167 [(set (match_dup 0)
168 (vec_merge:V4SF
169 (vec_duplicate:V4SF (match_dup 1))
170 (match_dup 2)
171 (const_int 1)))]
172 {
173 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
174 operands[2] = CONST0_RTX (V4SFmode);
175 })
176
177 (define_expand "movv2df"
178 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
179 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
180 "TARGET_SSE"
181 {
182 ix86_expand_vector_move (V2DFmode, operands);
183 DONE;
184 })
185
186 (define_insn "*movv2df_internal"
187 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
188 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
189 "TARGET_SSE
190 && (register_operand (operands[0], V2DFmode)
191 || register_operand (operands[1], V2DFmode))"
192 {
193 switch (which_alternative)
194 {
195 case 0:
196 return standard_sse_constant_opcode (insn, operands[1]);
197 case 1:
198 case 2:
199 if (get_attr_mode (insn) == MODE_V4SF)
200 return "movaps\t{%1, %0|%0, %1}";
201 else
202 return "movapd\t{%1, %0|%0, %1}";
203 default:
204 gcc_unreachable ();
205 }
206 }
207 [(set_attr "type" "sselog1,ssemov,ssemov")
208 (set (attr "mode")
209 (if_then_else
210 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
211 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
212 (and (eq_attr "alternative" "2")
213 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
214 (const_int 0))))
215 (const_string "V4SF")
216 (const_string "V2DF")))])
217
218 (define_split
219 [(set (match_operand:V2DF 0 "register_operand" "")
220 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
221 "TARGET_SSE2 && reload_completed"
222 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
223 {
224 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
225 operands[2] = CONST0_RTX (DFmode);
226 })
227
228 (define_expand "push<mode>1"
229 [(match_operand:SSEMODE 0 "register_operand" "")]
230 "TARGET_SSE"
231 {
232 ix86_expand_push (<MODE>mode, operands[0]);
233 DONE;
234 })
235
236 (define_expand "movmisalign<mode>"
237 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
238 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
239 "TARGET_SSE"
240 {
241 ix86_expand_vector_move_misalign (<MODE>mode, operands);
242 DONE;
243 })
244
245 (define_insn "sse_movups"
246 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
247 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
248 UNSPEC_MOVU))]
249 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
250 "movups\t{%1, %0|%0, %1}"
251 [(set_attr "type" "ssemov")
252 (set_attr "mode" "V2DF")])
253
254 (define_insn "sse2_movupd"
255 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
256 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
257 UNSPEC_MOVU))]
258 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
259 "movupd\t{%1, %0|%0, %1}"
260 [(set_attr "type" "ssemov")
261 (set_attr "mode" "V2DF")])
262
263 (define_insn "sse2_movdqu"
264 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
265 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
266 UNSPEC_MOVU))]
267 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
268 "movdqu\t{%1, %0|%0, %1}"
269 [(set_attr "type" "ssemov")
270 (set_attr "prefix_data16" "1")
271 (set_attr "mode" "TI")])
272
273 (define_insn "sse_movntv4sf"
274 [(set (match_operand:V4SF 0 "memory_operand" "=m")
275 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
276 UNSPEC_MOVNT))]
277 "TARGET_SSE"
278 "movntps\t{%1, %0|%0, %1}"
279 [(set_attr "type" "ssemov")
280 (set_attr "mode" "V4SF")])
281
282 (define_insn "sse2_movntv2df"
283 [(set (match_operand:V2DF 0 "memory_operand" "=m")
284 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
285 UNSPEC_MOVNT))]
286 "TARGET_SSE2"
287 "movntpd\t{%1, %0|%0, %1}"
288 [(set_attr "type" "ssecvt")
289 (set_attr "mode" "V2DF")])
290
291 (define_insn "sse2_movntv2di"
292 [(set (match_operand:V2DI 0 "memory_operand" "=m")
293 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
294 UNSPEC_MOVNT))]
295 "TARGET_SSE2"
296 "movntdq\t{%1, %0|%0, %1}"
297 [(set_attr "type" "ssecvt")
298 (set_attr "prefix_data16" "1")
299 (set_attr "mode" "TI")])
300
301 (define_insn "sse2_movntsi"
302 [(set (match_operand:SI 0 "memory_operand" "=m")
303 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
304 UNSPEC_MOVNT))]
305 "TARGET_SSE2"
306 "movnti\t{%1, %0|%0, %1}"
307 [(set_attr "type" "ssecvt")
308 (set_attr "mode" "V2DF")])
309
310 (define_insn "sse3_lddqu"
311 [(set (match_operand:V16QI 0 "register_operand" "=x")
312 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
313 UNSPEC_LDDQU))]
314 "TARGET_SSE3"
315 "lddqu\t{%1, %0|%0, %1}"
316 [(set_attr "type" "ssecvt")
317 (set_attr "prefix_rep" "1")
318 (set_attr "mode" "TI")])
319
320 ; Expand patterns for non-temporal stores. At the moment, only those
321 ; that directly map to insns are defined; it would be possible to
322 ; define patterns for other modes that would expand to several insns.
323
324 (define_expand "storentv4sf"
325 [(set (match_operand:V4SF 0 "memory_operand" "=m")
326 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
327 UNSPEC_MOVNT))]
328 "TARGET_SSE"
329 "")
330
331 (define_expand "storentv2df"
332 [(set (match_operand:V2DF 0 "memory_operand" "=m")
333 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
334 UNSPEC_MOVNT))]
335 "TARGET_SSE2"
336 "")
337
338 (define_expand "storentv2di"
339 [(set (match_operand:V2DI 0 "memory_operand" "=m")
340 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
341 UNSPEC_MOVNT))]
342 "TARGET_SSE2"
343 "")
344
345 (define_expand "storentsi"
346 [(set (match_operand:SI 0 "memory_operand" "=m")
347 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
348 UNSPEC_MOVNT))]
349 "TARGET_SSE2"
350 "")
351
352 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
353 ;;
354 ;; Parallel single-precision floating point arithmetic
355 ;;
356 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
357
358 (define_expand "negv4sf2"
359 [(set (match_operand:V4SF 0 "register_operand" "")
360 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
361 "TARGET_SSE"
362 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
363
364 (define_expand "absv4sf2"
365 [(set (match_operand:V4SF 0 "register_operand" "")
366 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
367 "TARGET_SSE"
368 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
369
370 (define_expand "addv4sf3"
371 [(set (match_operand:V4SF 0 "register_operand" "")
372 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
373 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
374 "TARGET_SSE"
375 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
376
377 (define_insn "*addv4sf3"
378 [(set (match_operand:V4SF 0 "register_operand" "=x")
379 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
380 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
381 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
382 "addps\t{%2, %0|%0, %2}"
383 [(set_attr "type" "sseadd")
384 (set_attr "mode" "V4SF")])
385
386 (define_insn "sse_vmaddv4sf3"
387 [(set (match_operand:V4SF 0 "register_operand" "=x")
388 (vec_merge:V4SF
389 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
390 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
391 (match_dup 1)
392 (const_int 1)))]
393 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
394 "addss\t{%2, %0|%0, %2}"
395 [(set_attr "type" "sseadd")
396 (set_attr "mode" "SF")])
397
398 (define_expand "subv4sf3"
399 [(set (match_operand:V4SF 0 "register_operand" "")
400 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
401 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
402 "TARGET_SSE"
403 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
404
405 (define_insn "*subv4sf3"
406 [(set (match_operand:V4SF 0 "register_operand" "=x")
407 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
408 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
409 "TARGET_SSE"
410 "subps\t{%2, %0|%0, %2}"
411 [(set_attr "type" "sseadd")
412 (set_attr "mode" "V4SF")])
413
414 (define_insn "sse_vmsubv4sf3"
415 [(set (match_operand:V4SF 0 "register_operand" "=x")
416 (vec_merge:V4SF
417 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
418 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
419 (match_dup 1)
420 (const_int 1)))]
421 "TARGET_SSE"
422 "subss\t{%2, %0|%0, %2}"
423 [(set_attr "type" "sseadd")
424 (set_attr "mode" "SF")])
425
426 (define_expand "mulv4sf3"
427 [(set (match_operand:V4SF 0 "register_operand" "")
428 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
429 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
430 "TARGET_SSE"
431 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
432
433 (define_insn "*mulv4sf3"
434 [(set (match_operand:V4SF 0 "register_operand" "=x")
435 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
436 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
437 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
438 "mulps\t{%2, %0|%0, %2}"
439 [(set_attr "type" "ssemul")
440 (set_attr "mode" "V4SF")])
441
442 (define_insn "sse_vmmulv4sf3"
443 [(set (match_operand:V4SF 0 "register_operand" "=x")
444 (vec_merge:V4SF
445 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
446 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
447 (match_dup 1)
448 (const_int 1)))]
449 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
450 "mulss\t{%2, %0|%0, %2}"
451 [(set_attr "type" "ssemul")
452 (set_attr "mode" "SF")])
453
454 (define_expand "divv4sf3"
455 [(set (match_operand:V4SF 0 "register_operand" "")
456 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
457 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
458 "TARGET_SSE"
459 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
460
461 (define_insn "*divv4sf3"
462 [(set (match_operand:V4SF 0 "register_operand" "=x")
463 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
464 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
465 "TARGET_SSE"
466 "divps\t{%2, %0|%0, %2}"
467 [(set_attr "type" "ssediv")
468 (set_attr "mode" "V4SF")])
469
470 (define_insn "sse_vmdivv4sf3"
471 [(set (match_operand:V4SF 0 "register_operand" "=x")
472 (vec_merge:V4SF
473 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
474 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
475 (match_dup 1)
476 (const_int 1)))]
477 "TARGET_SSE"
478 "divss\t{%2, %0|%0, %2}"
479 [(set_attr "type" "ssediv")
480 (set_attr "mode" "SF")])
481
482 (define_insn "sse_rcpv4sf2"
483 [(set (match_operand:V4SF 0 "register_operand" "=x")
484 (unspec:V4SF
485 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
486 "TARGET_SSE"
487 "rcpps\t{%1, %0|%0, %1}"
488 [(set_attr "type" "sse")
489 (set_attr "mode" "V4SF")])
490
491 (define_insn "sse_vmrcpv4sf2"
492 [(set (match_operand:V4SF 0 "register_operand" "=x")
493 (vec_merge:V4SF
494 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
495 UNSPEC_RCP)
496 (match_operand:V4SF 2 "register_operand" "0")
497 (const_int 1)))]
498 "TARGET_SSE"
499 "rcpss\t{%1, %0|%0, %1}"
500 [(set_attr "type" "sse")
501 (set_attr "mode" "SF")])
502
503 (define_insn "sse_rsqrtv4sf2"
504 [(set (match_operand:V4SF 0 "register_operand" "=x")
505 (unspec:V4SF
506 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
507 "TARGET_SSE"
508 "rsqrtps\t{%1, %0|%0, %1}"
509 [(set_attr "type" "sse")
510 (set_attr "mode" "V4SF")])
511
512 (define_insn "sse_vmrsqrtv4sf2"
513 [(set (match_operand:V4SF 0 "register_operand" "=x")
514 (vec_merge:V4SF
515 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
516 UNSPEC_RSQRT)
517 (match_operand:V4SF 2 "register_operand" "0")
518 (const_int 1)))]
519 "TARGET_SSE"
520 "rsqrtss\t{%1, %0|%0, %1}"
521 [(set_attr "type" "sse")
522 (set_attr "mode" "SF")])
523
524 (define_insn "sqrtv4sf2"
525 [(set (match_operand:V4SF 0 "register_operand" "=x")
526 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
527 "TARGET_SSE"
528 "sqrtps\t{%1, %0|%0, %1}"
529 [(set_attr "type" "sse")
530 (set_attr "mode" "V4SF")])
531
532 (define_insn "sse_vmsqrtv4sf2"
533 [(set (match_operand:V4SF 0 "register_operand" "=x")
534 (vec_merge:V4SF
535 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
536 (match_operand:V4SF 2 "register_operand" "0")
537 (const_int 1)))]
538 "TARGET_SSE"
539 "sqrtss\t{%1, %0|%0, %1}"
540 [(set_attr "type" "sse")
541 (set_attr "mode" "SF")])
542
543 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
544 ;; isn't really correct, as those rtl operators aren't defined when
545 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
546
547 (define_expand "smaxv4sf3"
548 [(set (match_operand:V4SF 0 "register_operand" "")
549 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
550 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
551 "TARGET_SSE"
552 {
553 if (!flag_finite_math_only)
554 operands[1] = force_reg (V4SFmode, operands[1]);
555 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
556 })
557
558 (define_insn "*smaxv4sf3_finite"
559 [(set (match_operand:V4SF 0 "register_operand" "=x")
560 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
561 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
562 "TARGET_SSE && flag_finite_math_only
563 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
564 "maxps\t{%2, %0|%0, %2}"
565 [(set_attr "type" "sse")
566 (set_attr "mode" "V4SF")])
567
568 (define_insn "*smaxv4sf3"
569 [(set (match_operand:V4SF 0 "register_operand" "=x")
570 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
571 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
572 "TARGET_SSE"
573 "maxps\t{%2, %0|%0, %2}"
574 [(set_attr "type" "sse")
575 (set_attr "mode" "V4SF")])
576
577 (define_insn "sse_vmsmaxv4sf3"
578 [(set (match_operand:V4SF 0 "register_operand" "=x")
579 (vec_merge:V4SF
580 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
581 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
582 (match_dup 1)
583 (const_int 1)))]
584 "TARGET_SSE"
585 "maxss\t{%2, %0|%0, %2}"
586 [(set_attr "type" "sse")
587 (set_attr "mode" "SF")])
588
589 (define_expand "sminv4sf3"
590 [(set (match_operand:V4SF 0 "register_operand" "")
591 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
592 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
593 "TARGET_SSE"
594 {
595 if (!flag_finite_math_only)
596 operands[1] = force_reg (V4SFmode, operands[1]);
597 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
598 })
599
600 (define_insn "*sminv4sf3_finite"
601 [(set (match_operand:V4SF 0 "register_operand" "=x")
602 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
603 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
604 "TARGET_SSE && flag_finite_math_only
605 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
606 "minps\t{%2, %0|%0, %2}"
607 [(set_attr "type" "sse")
608 (set_attr "mode" "V4SF")])
609
610 (define_insn "*sminv4sf3"
611 [(set (match_operand:V4SF 0 "register_operand" "=x")
612 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
613 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
614 "TARGET_SSE"
615 "minps\t{%2, %0|%0, %2}"
616 [(set_attr "type" "sse")
617 (set_attr "mode" "V4SF")])
618
619 (define_insn "sse_vmsminv4sf3"
620 [(set (match_operand:V4SF 0 "register_operand" "=x")
621 (vec_merge:V4SF
622 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
623 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
624 (match_dup 1)
625 (const_int 1)))]
626 "TARGET_SSE"
627 "minss\t{%2, %0|%0, %2}"
628 [(set_attr "type" "sse")
629 (set_attr "mode" "SF")])
630
631 ;; These versions of the min/max patterns implement exactly the operations
632 ;; min = (op1 < op2 ? op1 : op2)
633 ;; max = (!(op1 < op2) ? op1 : op2)
634 ;; Their operands are not commutative, and thus they may be used in the
635 ;; presence of -0.0 and NaN.
636
637 (define_insn "*ieee_sminv4sf3"
638 [(set (match_operand:V4SF 0 "register_operand" "=x")
639 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
640 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
641 UNSPEC_IEEE_MIN))]
642 "TARGET_SSE"
643 "minps\t{%2, %0|%0, %2}"
644 [(set_attr "type" "sseadd")
645 (set_attr "mode" "V4SF")])
646
647 (define_insn "*ieee_smaxv4sf3"
648 [(set (match_operand:V4SF 0 "register_operand" "=x")
649 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
650 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
651 UNSPEC_IEEE_MAX))]
652 "TARGET_SSE"
653 "maxps\t{%2, %0|%0, %2}"
654 [(set_attr "type" "sseadd")
655 (set_attr "mode" "V4SF")])
656
657 (define_insn "*ieee_sminv2df3"
658 [(set (match_operand:V2DF 0 "register_operand" "=x")
659 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
660 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
661 UNSPEC_IEEE_MIN))]
662 "TARGET_SSE2"
663 "minpd\t{%2, %0|%0, %2}"
664 [(set_attr "type" "sseadd")
665 (set_attr "mode" "V2DF")])
666
667 (define_insn "*ieee_smaxv2df3"
668 [(set (match_operand:V2DF 0 "register_operand" "=x")
669 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
670 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
671 UNSPEC_IEEE_MAX))]
672 "TARGET_SSE2"
673 "maxpd\t{%2, %0|%0, %2}"
674 [(set_attr "type" "sseadd")
675 (set_attr "mode" "V2DF")])
676
677 (define_insn "sse3_addsubv4sf3"
678 [(set (match_operand:V4SF 0 "register_operand" "=x")
679 (vec_merge:V4SF
680 (plus:V4SF
681 (match_operand:V4SF 1 "register_operand" "0")
682 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
683 (minus:V4SF (match_dup 1) (match_dup 2))
684 (const_int 5)))]
685 "TARGET_SSE3"
686 "addsubps\t{%2, %0|%0, %2}"
687 [(set_attr "type" "sseadd")
688 (set_attr "prefix_rep" "1")
689 (set_attr "mode" "V4SF")])
690
691 (define_insn "sse3_haddv4sf3"
692 [(set (match_operand:V4SF 0 "register_operand" "=x")
693 (vec_concat:V4SF
694 (vec_concat:V2SF
695 (plus:SF
696 (vec_select:SF
697 (match_operand:V4SF 1 "register_operand" "0")
698 (parallel [(const_int 0)]))
699 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
700 (plus:SF
701 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
702 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
703 (vec_concat:V2SF
704 (plus:SF
705 (vec_select:SF
706 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
707 (parallel [(const_int 0)]))
708 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
709 (plus:SF
710 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
711 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
712 "TARGET_SSE3"
713 "haddps\t{%2, %0|%0, %2}"
714 [(set_attr "type" "sseadd")
715 (set_attr "prefix_rep" "1")
716 (set_attr "mode" "V4SF")])
717
718 (define_insn "sse3_hsubv4sf3"
719 [(set (match_operand:V4SF 0 "register_operand" "=x")
720 (vec_concat:V4SF
721 (vec_concat:V2SF
722 (minus:SF
723 (vec_select:SF
724 (match_operand:V4SF 1 "register_operand" "0")
725 (parallel [(const_int 0)]))
726 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
727 (minus:SF
728 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
729 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
730 (vec_concat:V2SF
731 (minus:SF
732 (vec_select:SF
733 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
734 (parallel [(const_int 0)]))
735 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
736 (minus:SF
737 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
738 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
739 "TARGET_SSE3"
740 "hsubps\t{%2, %0|%0, %2}"
741 [(set_attr "type" "sseadd")
742 (set_attr "prefix_rep" "1")
743 (set_attr "mode" "V4SF")])
744
745 (define_expand "reduc_splus_v4sf"
746 [(match_operand:V4SF 0 "register_operand" "")
747 (match_operand:V4SF 1 "register_operand" "")]
748 "TARGET_SSE"
749 {
750 if (TARGET_SSE3)
751 {
752 rtx tmp = gen_reg_rtx (V4SFmode);
753 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
754 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
755 }
756 else
757 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
758 DONE;
759 })
760
761 (define_expand "reduc_smax_v4sf"
762 [(match_operand:V4SF 0 "register_operand" "")
763 (match_operand:V4SF 1 "register_operand" "")]
764 "TARGET_SSE"
765 {
766 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
767 DONE;
768 })
769
770 (define_expand "reduc_smin_v4sf"
771 [(match_operand:V4SF 0 "register_operand" "")
772 (match_operand:V4SF 1 "register_operand" "")]
773 "TARGET_SSE"
774 {
775 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
776 DONE;
777 })
778
779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
780 ;;
781 ;; Parallel single-precision floating point comparisons
782 ;;
783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
784
785 (define_insn "sse_maskcmpv4sf3"
786 [(set (match_operand:V4SF 0 "register_operand" "=x")
787 (match_operator:V4SF 3 "sse_comparison_operator"
788 [(match_operand:V4SF 1 "register_operand" "0")
789 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
790 "TARGET_SSE"
791 "cmp%D3ps\t{%2, %0|%0, %2}"
792 [(set_attr "type" "ssecmp")
793 (set_attr "mode" "V4SF")])
794
795 (define_insn "sse_maskcmpsf3"
796 [(set (match_operand:SF 0 "register_operand" "=x")
797 (match_operator:SF 3 "sse_comparison_operator"
798 [(match_operand:SF 1 "register_operand" "0")
799 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
800 "TARGET_SSE"
801 "cmp%D3ss\t{%2, %0|%0, %2}"
802 [(set_attr "type" "ssecmp")
803 (set_attr "mode" "SF")])
804
805 (define_insn "sse_vmmaskcmpv4sf3"
806 [(set (match_operand:V4SF 0 "register_operand" "=x")
807 (vec_merge:V4SF
808 (match_operator:V4SF 3 "sse_comparison_operator"
809 [(match_operand:V4SF 1 "register_operand" "0")
810 (match_operand:V4SF 2 "register_operand" "x")])
811 (match_dup 1)
812 (const_int 1)))]
813 "TARGET_SSE"
814 "cmp%D3ss\t{%2, %0|%0, %2}"
815 [(set_attr "type" "ssecmp")
816 (set_attr "mode" "SF")])
817
818 (define_insn "sse_comi"
819 [(set (reg:CCFP FLAGS_REG)
820 (compare:CCFP
821 (vec_select:SF
822 (match_operand:V4SF 0 "register_operand" "x")
823 (parallel [(const_int 0)]))
824 (vec_select:SF
825 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
826 (parallel [(const_int 0)]))))]
827 "TARGET_SSE"
828 "comiss\t{%1, %0|%0, %1}"
829 [(set_attr "type" "ssecomi")
830 (set_attr "mode" "SF")])
831
832 (define_insn "sse_ucomi"
833 [(set (reg:CCFPU FLAGS_REG)
834 (compare:CCFPU
835 (vec_select:SF
836 (match_operand:V4SF 0 "register_operand" "x")
837 (parallel [(const_int 0)]))
838 (vec_select:SF
839 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
840 (parallel [(const_int 0)]))))]
841 "TARGET_SSE"
842 "ucomiss\t{%1, %0|%0, %1}"
843 [(set_attr "type" "ssecomi")
844 (set_attr "mode" "SF")])
845
846 (define_expand "vcondv4sf"
847 [(set (match_operand:V4SF 0 "register_operand" "")
848 (if_then_else:V4SF
849 (match_operator 3 ""
850 [(match_operand:V4SF 4 "nonimmediate_operand" "")
851 (match_operand:V4SF 5 "nonimmediate_operand" "")])
852 (match_operand:V4SF 1 "general_operand" "")
853 (match_operand:V4SF 2 "general_operand" "")))]
854 "TARGET_SSE"
855 {
856 if (ix86_expand_fp_vcond (operands))
857 DONE;
858 else
859 FAIL;
860 })
861
862 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
863 ;;
864 ;; Parallel single-precision floating point logical operations
865 ;;
866 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
867
868 (define_expand "andv4sf3"
869 [(set (match_operand:V4SF 0 "register_operand" "")
870 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
871 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
872 "TARGET_SSE"
873 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
874
875 (define_insn "*andv4sf3"
876 [(set (match_operand:V4SF 0 "register_operand" "=x")
877 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
878 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
879 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
880 "andps\t{%2, %0|%0, %2}"
881 [(set_attr "type" "sselog")
882 (set_attr "mode" "V4SF")])
883
884 (define_insn "sse_nandv4sf3"
885 [(set (match_operand:V4SF 0 "register_operand" "=x")
886 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
887 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
888 "TARGET_SSE"
889 "andnps\t{%2, %0|%0, %2}"
890 [(set_attr "type" "sselog")
891 (set_attr "mode" "V4SF")])
892
893 (define_expand "iorv4sf3"
894 [(set (match_operand:V4SF 0 "register_operand" "")
895 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
896 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
897 "TARGET_SSE"
898 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
899
900 (define_insn "*iorv4sf3"
901 [(set (match_operand:V4SF 0 "register_operand" "=x")
902 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
903 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
904 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
905 "orps\t{%2, %0|%0, %2}"
906 [(set_attr "type" "sselog")
907 (set_attr "mode" "V4SF")])
908
909 (define_expand "xorv4sf3"
910 [(set (match_operand:V4SF 0 "register_operand" "")
911 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
912 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
913 "TARGET_SSE"
914 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
915
916 (define_insn "*xorv4sf3"
917 [(set (match_operand:V4SF 0 "register_operand" "=x")
918 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
919 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
920 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
921 "xorps\t{%2, %0|%0, %2}"
922 [(set_attr "type" "sselog")
923 (set_attr "mode" "V4SF")])
924
925 ;; Also define scalar versions. These are used for abs, neg, and
926 ;; conditional move. Using subregs into vector modes causes register
927 ;; allocation lossage. These patterns do not allow memory operands
928 ;; because the native instructions read the full 128-bits.
929
930 (define_insn "*andsf3"
931 [(set (match_operand:SF 0 "register_operand" "=x")
932 (and:SF (match_operand:SF 1 "register_operand" "0")
933 (match_operand:SF 2 "register_operand" "x")))]
934 "TARGET_SSE"
935 "andps\t{%2, %0|%0, %2}"
936 [(set_attr "type" "sselog")
937 (set_attr "mode" "V4SF")])
938
939 (define_insn "*nandsf3"
940 [(set (match_operand:SF 0 "register_operand" "=x")
941 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
942 (match_operand:SF 2 "register_operand" "x")))]
943 "TARGET_SSE"
944 "andnps\t{%2, %0|%0, %2}"
945 [(set_attr "type" "sselog")
946 (set_attr "mode" "V4SF")])
947
948 (define_insn "*iorsf3"
949 [(set (match_operand:SF 0 "register_operand" "=x")
950 (ior:SF (match_operand:SF 1 "register_operand" "0")
951 (match_operand:SF 2 "register_operand" "x")))]
952 "TARGET_SSE"
953 "orps\t{%2, %0|%0, %2}"
954 [(set_attr "type" "sselog")
955 (set_attr "mode" "V4SF")])
956
957 (define_insn "*xorsf3"
958 [(set (match_operand:SF 0 "register_operand" "=x")
959 (xor:SF (match_operand:SF 1 "register_operand" "0")
960 (match_operand:SF 2 "register_operand" "x")))]
961 "TARGET_SSE"
962 "xorps\t{%2, %0|%0, %2}"
963 [(set_attr "type" "sselog")
964 (set_attr "mode" "V4SF")])
965
966 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
967 ;;
968 ;; Parallel single-precision floating point conversion operations
969 ;;
970 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
971
972 (define_insn "sse_cvtpi2ps"
973 [(set (match_operand:V4SF 0 "register_operand" "=x")
974 (vec_merge:V4SF
975 (vec_duplicate:V4SF
976 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
977 (match_operand:V4SF 1 "register_operand" "0")
978 (const_int 3)))]
979 "TARGET_SSE"
980 "cvtpi2ps\t{%2, %0|%0, %2}"
981 [(set_attr "type" "ssecvt")
982 (set_attr "mode" "V4SF")])
983
984 (define_insn "sse_cvtps2pi"
985 [(set (match_operand:V2SI 0 "register_operand" "=y")
986 (vec_select:V2SI
987 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
988 UNSPEC_FIX_NOTRUNC)
989 (parallel [(const_int 0) (const_int 1)])))]
990 "TARGET_SSE"
991 "cvtps2pi\t{%1, %0|%0, %1}"
992 [(set_attr "type" "ssecvt")
993 (set_attr "unit" "mmx")
994 (set_attr "mode" "DI")])
995
996 (define_insn "sse_cvttps2pi"
997 [(set (match_operand:V2SI 0 "register_operand" "=y")
998 (vec_select:V2SI
999 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1000 (parallel [(const_int 0) (const_int 1)])))]
1001 "TARGET_SSE"
1002 "cvttps2pi\t{%1, %0|%0, %1}"
1003 [(set_attr "type" "ssecvt")
1004 (set_attr "unit" "mmx")
1005 (set_attr "mode" "SF")])
1006
1007 (define_insn "sse_cvtsi2ss"
1008 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1009 (vec_merge:V4SF
1010 (vec_duplicate:V4SF
1011 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1012 (match_operand:V4SF 1 "register_operand" "0,0")
1013 (const_int 1)))]
1014 "TARGET_SSE"
1015 "cvtsi2ss\t{%2, %0|%0, %2}"
1016 [(set_attr "type" "sseicvt")
1017 (set_attr "athlon_decode" "vector,double")
1018 (set_attr "amdfam10_decode" "vector,double")
1019 (set_attr "mode" "SF")])
1020
1021 (define_insn "sse_cvtsi2ssq"
1022 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1023 (vec_merge:V4SF
1024 (vec_duplicate:V4SF
1025 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1026 (match_operand:V4SF 1 "register_operand" "0,0")
1027 (const_int 1)))]
1028 "TARGET_SSE && TARGET_64BIT"
1029 "cvtsi2ssq\t{%2, %0|%0, %2}"
1030 [(set_attr "type" "sseicvt")
1031 (set_attr "athlon_decode" "vector,double")
1032 (set_attr "amdfam10_decode" "vector,double")
1033 (set_attr "mode" "SF")])
1034
1035 (define_insn "sse_cvtss2si"
1036 [(set (match_operand:SI 0 "register_operand" "=r,r")
1037 (unspec:SI
1038 [(vec_select:SF
1039 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1040 (parallel [(const_int 0)]))]
1041 UNSPEC_FIX_NOTRUNC))]
1042 "TARGET_SSE"
1043 "cvtss2si\t{%1, %0|%0, %1}"
1044 [(set_attr "type" "sseicvt")
1045 (set_attr "athlon_decode" "double,vector")
1046 (set_attr "prefix_rep" "1")
1047 (set_attr "mode" "SI")])
1048
1049 (define_insn "sse_cvtss2si_2"
1050 [(set (match_operand:SI 0 "register_operand" "=r,r")
1051 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1052 UNSPEC_FIX_NOTRUNC))]
1053 "TARGET_SSE"
1054 "cvtss2si\t{%1, %0|%0, %1}"
1055 [(set_attr "type" "sseicvt")
1056 (set_attr "athlon_decode" "double,vector")
1057 (set_attr "amdfam10_decode" "double,double")
1058 (set_attr "prefix_rep" "1")
1059 (set_attr "mode" "SI")])
1060
1061 (define_insn "sse_cvtss2siq"
1062 [(set (match_operand:DI 0 "register_operand" "=r,r")
1063 (unspec:DI
1064 [(vec_select:SF
1065 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1066 (parallel [(const_int 0)]))]
1067 UNSPEC_FIX_NOTRUNC))]
1068 "TARGET_SSE && TARGET_64BIT"
1069 "cvtss2siq\t{%1, %0|%0, %1}"
1070 [(set_attr "type" "sseicvt")
1071 (set_attr "athlon_decode" "double,vector")
1072 (set_attr "prefix_rep" "1")
1073 (set_attr "mode" "DI")])
1074
1075 (define_insn "sse_cvtss2siq_2"
1076 [(set (match_operand:DI 0 "register_operand" "=r,r")
1077 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1078 UNSPEC_FIX_NOTRUNC))]
1079 "TARGET_SSE && TARGET_64BIT"
1080 "cvtss2siq\t{%1, %0|%0, %1}"
1081 [(set_attr "type" "sseicvt")
1082 (set_attr "athlon_decode" "double,vector")
1083 (set_attr "amdfam10_decode" "double,double")
1084 (set_attr "prefix_rep" "1")
1085 (set_attr "mode" "DI")])
1086
1087 (define_insn "sse_cvttss2si"
1088 [(set (match_operand:SI 0 "register_operand" "=r,r")
1089 (fix:SI
1090 (vec_select:SF
1091 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1092 (parallel [(const_int 0)]))))]
1093 "TARGET_SSE"
1094 "cvttss2si\t{%1, %0|%0, %1}"
1095 [(set_attr "type" "sseicvt")
1096 (set_attr "athlon_decode" "double,vector")
1097 (set_attr "amdfam10_decode" "double,double")
1098 (set_attr "prefix_rep" "1")
1099 (set_attr "mode" "SI")])
1100
1101 (define_insn "sse_cvttss2siq"
1102 [(set (match_operand:DI 0 "register_operand" "=r,r")
1103 (fix:DI
1104 (vec_select:SF
1105 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1106 (parallel [(const_int 0)]))))]
1107 "TARGET_SSE && TARGET_64BIT"
1108 "cvttss2siq\t{%1, %0|%0, %1}"
1109 [(set_attr "type" "sseicvt")
1110 (set_attr "athlon_decode" "double,vector")
1111 (set_attr "amdfam10_decode" "double,double")
1112 (set_attr "prefix_rep" "1")
1113 (set_attr "mode" "DI")])
1114
1115 (define_insn "sse2_cvtdq2ps"
1116 [(set (match_operand:V4SF 0 "register_operand" "=x")
1117 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1118 "TARGET_SSE2"
1119 "cvtdq2ps\t{%1, %0|%0, %1}"
1120 [(set_attr "type" "ssecvt")
1121 (set_attr "mode" "V4SF")])
1122
1123 (define_insn "sse2_cvtps2dq"
1124 [(set (match_operand:V4SI 0 "register_operand" "=x")
1125 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1126 UNSPEC_FIX_NOTRUNC))]
1127 "TARGET_SSE2"
1128 "cvtps2dq\t{%1, %0|%0, %1}"
1129 [(set_attr "type" "ssecvt")
1130 (set_attr "prefix_data16" "1")
1131 (set_attr "mode" "TI")])
1132
1133 (define_insn "sse2_cvttps2dq"
1134 [(set (match_operand:V4SI 0 "register_operand" "=x")
1135 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1136 "TARGET_SSE2"
1137 "cvttps2dq\t{%1, %0|%0, %1}"
1138 [(set_attr "type" "ssecvt")
1139 (set_attr "prefix_rep" "1")
1140 (set_attr "mode" "TI")])
1141
1142 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1143 ;;
1144 ;; Parallel single-precision floating point element swizzling
1145 ;;
1146 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1147
1148 (define_insn "sse_movhlps"
1149 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1150 (vec_select:V4SF
1151 (vec_concat:V8SF
1152 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1153 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1154 (parallel [(const_int 6)
1155 (const_int 7)
1156 (const_int 2)
1157 (const_int 3)])))]
1158 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1159 "@
1160 movhlps\t{%2, %0|%0, %2}
1161 movlps\t{%H2, %0|%0, %H2}
1162 movhps\t{%2, %0|%0, %2}"
1163 [(set_attr "type" "ssemov")
1164 (set_attr "mode" "V4SF,V2SF,V2SF")])
1165
1166 (define_insn "sse_movlhps"
1167 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1168 (vec_select:V4SF
1169 (vec_concat:V8SF
1170 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1171 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1172 (parallel [(const_int 0)
1173 (const_int 1)
1174 (const_int 4)
1175 (const_int 5)])))]
1176 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1177 "@
1178 movlhps\t{%2, %0|%0, %2}
1179 movhps\t{%2, %0|%0, %2}
1180 movlps\t{%2, %H0|%H0, %2}"
1181 [(set_attr "type" "ssemov")
1182 (set_attr "mode" "V4SF,V2SF,V2SF")])
1183
1184 (define_insn "sse_unpckhps"
1185 [(set (match_operand:V4SF 0 "register_operand" "=x")
1186 (vec_select:V4SF
1187 (vec_concat:V8SF
1188 (match_operand:V4SF 1 "register_operand" "0")
1189 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1190 (parallel [(const_int 2) (const_int 6)
1191 (const_int 3) (const_int 7)])))]
1192 "TARGET_SSE"
1193 "unpckhps\t{%2, %0|%0, %2}"
1194 [(set_attr "type" "sselog")
1195 (set_attr "mode" "V4SF")])
1196
1197 (define_insn "sse_unpcklps"
1198 [(set (match_operand:V4SF 0 "register_operand" "=x")
1199 (vec_select:V4SF
1200 (vec_concat:V8SF
1201 (match_operand:V4SF 1 "register_operand" "0")
1202 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1203 (parallel [(const_int 0) (const_int 4)
1204 (const_int 1) (const_int 5)])))]
1205 "TARGET_SSE"
1206 "unpcklps\t{%2, %0|%0, %2}"
1207 [(set_attr "type" "sselog")
1208 (set_attr "mode" "V4SF")])
1209
1210 ;; These are modeled with the same vec_concat as the others so that we
1211 ;; capture users of shufps that can use the new instructions
1212 (define_insn "sse3_movshdup"
1213 [(set (match_operand:V4SF 0 "register_operand" "=x")
1214 (vec_select:V4SF
1215 (vec_concat:V8SF
1216 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1217 (match_dup 1))
1218 (parallel [(const_int 1)
1219 (const_int 1)
1220 (const_int 7)
1221 (const_int 7)])))]
1222 "TARGET_SSE3"
1223 "movshdup\t{%1, %0|%0, %1}"
1224 [(set_attr "type" "sse")
1225 (set_attr "prefix_rep" "1")
1226 (set_attr "mode" "V4SF")])
1227
1228 (define_insn "sse3_movsldup"
1229 [(set (match_operand:V4SF 0 "register_operand" "=x")
1230 (vec_select:V4SF
1231 (vec_concat:V8SF
1232 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1233 (match_dup 1))
1234 (parallel [(const_int 0)
1235 (const_int 0)
1236 (const_int 6)
1237 (const_int 6)])))]
1238 "TARGET_SSE3"
1239 "movsldup\t{%1, %0|%0, %1}"
1240 [(set_attr "type" "sse")
1241 (set_attr "prefix_rep" "1")
1242 (set_attr "mode" "V4SF")])
1243
1244 (define_expand "sse_shufps"
1245 [(match_operand:V4SF 0 "register_operand" "")
1246 (match_operand:V4SF 1 "register_operand" "")
1247 (match_operand:V4SF 2 "nonimmediate_operand" "")
1248 (match_operand:SI 3 "const_int_operand" "")]
1249 "TARGET_SSE"
1250 {
1251 int mask = INTVAL (operands[3]);
1252 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1253 GEN_INT ((mask >> 0) & 3),
1254 GEN_INT ((mask >> 2) & 3),
1255 GEN_INT (((mask >> 4) & 3) + 4),
1256 GEN_INT (((mask >> 6) & 3) + 4)));
1257 DONE;
1258 })
1259
1260 (define_insn "sse_shufps_1"
1261 [(set (match_operand:V4SF 0 "register_operand" "=x")
1262 (vec_select:V4SF
1263 (vec_concat:V8SF
1264 (match_operand:V4SF 1 "register_operand" "0")
1265 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1266 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1267 (match_operand 4 "const_0_to_3_operand" "")
1268 (match_operand 5 "const_4_to_7_operand" "")
1269 (match_operand 6 "const_4_to_7_operand" "")])))]
1270 "TARGET_SSE"
1271 {
1272 int mask = 0;
1273 mask |= INTVAL (operands[3]) << 0;
1274 mask |= INTVAL (operands[4]) << 2;
1275 mask |= (INTVAL (operands[5]) - 4) << 4;
1276 mask |= (INTVAL (operands[6]) - 4) << 6;
1277 operands[3] = GEN_INT (mask);
1278
1279 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1280 }
1281 [(set_attr "type" "sselog")
1282 (set_attr "mode" "V4SF")])
1283
1284 (define_insn "sse_storehps"
1285 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1286 (vec_select:V2SF
1287 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1288 (parallel [(const_int 2) (const_int 3)])))]
1289 "TARGET_SSE"
1290 "@
1291 movhps\t{%1, %0|%0, %1}
1292 movhlps\t{%1, %0|%0, %1}
1293 movlps\t{%H1, %0|%0, %H1}"
1294 [(set_attr "type" "ssemov")
1295 (set_attr "mode" "V2SF,V4SF,V2SF")])
1296
1297 (define_insn "sse_loadhps"
1298 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1299 (vec_concat:V4SF
1300 (vec_select:V2SF
1301 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1302 (parallel [(const_int 0) (const_int 1)]))
1303 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1304 "TARGET_SSE"
1305 "@
1306 movhps\t{%2, %0|%0, %2}
1307 movlhps\t{%2, %0|%0, %2}
1308 movlps\t{%2, %H0|%H0, %2}"
1309 [(set_attr "type" "ssemov")
1310 (set_attr "mode" "V2SF,V4SF,V2SF")])
1311
1312 (define_insn "sse_storelps"
1313 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1314 (vec_select:V2SF
1315 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1316 (parallel [(const_int 0) (const_int 1)])))]
1317 "TARGET_SSE"
1318 "@
1319 movlps\t{%1, %0|%0, %1}
1320 movaps\t{%1, %0|%0, %1}
1321 movlps\t{%1, %0|%0, %1}"
1322 [(set_attr "type" "ssemov")
1323 (set_attr "mode" "V2SF,V4SF,V2SF")])
1324
1325 (define_insn "sse_loadlps"
1326 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1327 (vec_concat:V4SF
1328 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1329 (vec_select:V2SF
1330 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1331 (parallel [(const_int 2) (const_int 3)]))))]
1332 "TARGET_SSE"
1333 "@
1334 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1335 movlps\t{%2, %0|%0, %2}
1336 movlps\t{%2, %0|%0, %2}"
1337 [(set_attr "type" "sselog,ssemov,ssemov")
1338 (set_attr "mode" "V4SF,V2SF,V2SF")])
1339
1340 (define_insn "sse_movss"
1341 [(set (match_operand:V4SF 0 "register_operand" "=x")
1342 (vec_merge:V4SF
1343 (match_operand:V4SF 2 "register_operand" "x")
1344 (match_operand:V4SF 1 "register_operand" "0")
1345 (const_int 1)))]
1346 "TARGET_SSE"
1347 "movss\t{%2, %0|%0, %2}"
1348 [(set_attr "type" "ssemov")
1349 (set_attr "mode" "SF")])
1350
1351 (define_insn "*vec_dupv4sf"
1352 [(set (match_operand:V4SF 0 "register_operand" "=x")
1353 (vec_duplicate:V4SF
1354 (match_operand:SF 1 "register_operand" "0")))]
1355 "TARGET_SSE"
1356 "shufps\t{$0, %0, %0|%0, %0, 0}"
1357 [(set_attr "type" "sselog1")
1358 (set_attr "mode" "V4SF")])
1359
1360 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1361 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1362 ;; alternatives pretty much forces the MMX alternative to be chosen.
1363 (define_insn "*sse_concatv2sf"
1364 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1365 (vec_concat:V2SF
1366 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1367 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1368 "TARGET_SSE"
1369 "@
1370 unpcklps\t{%2, %0|%0, %2}
1371 movss\t{%1, %0|%0, %1}
1372 punpckldq\t{%2, %0|%0, %2}
1373 movd\t{%1, %0|%0, %1}"
1374 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1375 (set_attr "mode" "V4SF,SF,DI,DI")])
1376
1377 (define_insn "*sse_concatv4sf"
1378 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1379 (vec_concat:V4SF
1380 (match_operand:V2SF 1 "register_operand" " 0,0")
1381 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1382 "TARGET_SSE"
1383 "@
1384 movlhps\t{%2, %0|%0, %2}
1385 movhps\t{%2, %0|%0, %2}"
1386 [(set_attr "type" "ssemov")
1387 (set_attr "mode" "V4SF,V2SF")])
1388
1389 (define_expand "vec_initv4sf"
1390 [(match_operand:V4SF 0 "register_operand" "")
1391 (match_operand 1 "" "")]
1392 "TARGET_SSE"
1393 {
1394 ix86_expand_vector_init (false, operands[0], operands[1]);
1395 DONE;
1396 })
1397
1398 (define_insn "vec_setv4sf_0"
1399 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Yt,m")
1400 (vec_merge:V4SF
1401 (vec_duplicate:V4SF
1402 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1403 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1404 (const_int 1)))]
1405 "TARGET_SSE"
1406 "@
1407 movss\t{%2, %0|%0, %2}
1408 movss\t{%2, %0|%0, %2}
1409 movd\t{%2, %0|%0, %2}
1410 #"
1411 [(set_attr "type" "ssemov")
1412 (set_attr "mode" "SF")])
1413
1414 ;; A subset is vec_setv4sf.
1415 (define_insn "*vec_setv4sf_sse4_1"
1416 [(set (match_operand:V4SF 0 "register_operand" "=x")
1417 (vec_merge:V4SF
1418 (vec_duplicate:V4SF
1419 (match_operand:SF 2 "nonimmediate_operand" "xm"))
1420 (match_operand:V4SF 1 "register_operand" "0")
1421 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
1422 "TARGET_SSE4_1"
1423 {
1424 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
1425 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1426 }
1427 [(set_attr "type" "sselog")
1428 (set_attr "prefix_extra" "1")
1429 (set_attr "mode" "V4SF")])
1430
1431 (define_insn "sse4_1_insertps"
1432 [(set (match_operand:V4SF 0 "register_operand" "=x")
1433 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
1434 (match_operand:V4SF 1 "register_operand" "0")
1435 (match_operand:SI 3 "const_0_to_255_operand" "n")]
1436 UNSPEC_INSERTPS))]
1437 "TARGET_SSE4_1"
1438 "insertps\t{%3, %2, %0|%0, %2, %3}";
1439 [(set_attr "type" "sselog")
1440 (set_attr "prefix_extra" "1")
1441 (set_attr "mode" "V4SF")])
1442
1443 (define_split
1444 [(set (match_operand:V4SF 0 "memory_operand" "")
1445 (vec_merge:V4SF
1446 (vec_duplicate:V4SF
1447 (match_operand:SF 1 "nonmemory_operand" ""))
1448 (match_dup 0)
1449 (const_int 1)))]
1450 "TARGET_SSE && reload_completed"
1451 [(const_int 0)]
1452 {
1453 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1454 DONE;
1455 })
1456
1457 (define_expand "vec_setv4sf"
1458 [(match_operand:V4SF 0 "register_operand" "")
1459 (match_operand:SF 1 "register_operand" "")
1460 (match_operand 2 "const_int_operand" "")]
1461 "TARGET_SSE"
1462 {
1463 ix86_expand_vector_set (false, operands[0], operands[1],
1464 INTVAL (operands[2]));
1465 DONE;
1466 })
1467
1468 (define_insn_and_split "*vec_extractv4sf_0"
1469 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1470 (vec_select:SF
1471 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1472 (parallel [(const_int 0)])))]
1473 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1474 "#"
1475 "&& reload_completed"
1476 [(const_int 0)]
1477 {
1478 rtx op1 = operands[1];
1479 if (REG_P (op1))
1480 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1481 else
1482 op1 = gen_lowpart (SFmode, op1);
1483 emit_move_insn (operands[0], op1);
1484 DONE;
1485 })
1486
1487 (define_insn "*sse4_1_extractps"
1488 [(set (match_operand:SF 0 "register_operand" "=rm")
1489 (vec_select:SF
1490 (match_operand:V4SF 1 "register_operand" "x")
1491 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
1492 "TARGET_SSE4_1"
1493 "extractps\t{%2, %1, %0|%0, %1, %2}"
1494 [(set_attr "type" "sselog")
1495 (set_attr "prefix_extra" "1")
1496 (set_attr "mode" "V4SF")])
1497
1498 (define_expand "vec_extractv4sf"
1499 [(match_operand:SF 0 "register_operand" "")
1500 (match_operand:V4SF 1 "register_operand" "")
1501 (match_operand 2 "const_int_operand" "")]
1502 "TARGET_SSE"
1503 {
1504 ix86_expand_vector_extract (false, operands[0], operands[1],
1505 INTVAL (operands[2]));
1506 DONE;
1507 })
1508
1509 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1510 ;;
1511 ;; Parallel double-precision floating point arithmetic
1512 ;;
1513 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1514
1515 (define_expand "negv2df2"
1516 [(set (match_operand:V2DF 0 "register_operand" "")
1517 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1518 "TARGET_SSE2"
1519 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1520
1521 (define_expand "absv2df2"
1522 [(set (match_operand:V2DF 0 "register_operand" "")
1523 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1524 "TARGET_SSE2"
1525 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1526
1527 (define_expand "addv2df3"
1528 [(set (match_operand:V2DF 0 "register_operand" "")
1529 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1530 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1531 "TARGET_SSE2"
1532 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1533
1534 (define_insn "*addv2df3"
1535 [(set (match_operand:V2DF 0 "register_operand" "=x")
1536 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1537 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1538 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1539 "addpd\t{%2, %0|%0, %2}"
1540 [(set_attr "type" "sseadd")
1541 (set_attr "mode" "V2DF")])
1542
1543 (define_insn "sse2_vmaddv2df3"
1544 [(set (match_operand:V2DF 0 "register_operand" "=x")
1545 (vec_merge:V2DF
1546 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1547 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1548 (match_dup 1)
1549 (const_int 1)))]
1550 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1551 "addsd\t{%2, %0|%0, %2}"
1552 [(set_attr "type" "sseadd")
1553 (set_attr "mode" "DF")])
1554
1555 (define_expand "subv2df3"
1556 [(set (match_operand:V2DF 0 "register_operand" "")
1557 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1558 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1559 "TARGET_SSE2"
1560 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1561
1562 (define_insn "*subv2df3"
1563 [(set (match_operand:V2DF 0 "register_operand" "=x")
1564 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1565 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1566 "TARGET_SSE2"
1567 "subpd\t{%2, %0|%0, %2}"
1568 [(set_attr "type" "sseadd")
1569 (set_attr "mode" "V2DF")])
1570
1571 (define_insn "sse2_vmsubv2df3"
1572 [(set (match_operand:V2DF 0 "register_operand" "=x")
1573 (vec_merge:V2DF
1574 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1575 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1576 (match_dup 1)
1577 (const_int 1)))]
1578 "TARGET_SSE2"
1579 "subsd\t{%2, %0|%0, %2}"
1580 [(set_attr "type" "sseadd")
1581 (set_attr "mode" "DF")])
1582
1583 (define_expand "mulv2df3"
1584 [(set (match_operand:V2DF 0 "register_operand" "")
1585 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1586 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1587 "TARGET_SSE2"
1588 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1589
1590 (define_insn "*mulv2df3"
1591 [(set (match_operand:V2DF 0 "register_operand" "=x")
1592 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1593 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1594 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1595 "mulpd\t{%2, %0|%0, %2}"
1596 [(set_attr "type" "ssemul")
1597 (set_attr "mode" "V2DF")])
1598
1599 (define_insn "sse2_vmmulv2df3"
1600 [(set (match_operand:V2DF 0 "register_operand" "=x")
1601 (vec_merge:V2DF
1602 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1603 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1604 (match_dup 1)
1605 (const_int 1)))]
1606 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1607 "mulsd\t{%2, %0|%0, %2}"
1608 [(set_attr "type" "ssemul")
1609 (set_attr "mode" "DF")])
1610
1611 (define_expand "divv2df3"
1612 [(set (match_operand:V2DF 0 "register_operand" "")
1613 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1614 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1615 "TARGET_SSE2"
1616 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1617
1618 (define_insn "*divv2df3"
1619 [(set (match_operand:V2DF 0 "register_operand" "=x")
1620 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1621 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1622 "TARGET_SSE2"
1623 "divpd\t{%2, %0|%0, %2}"
1624 [(set_attr "type" "ssediv")
1625 (set_attr "mode" "V2DF")])
1626
1627 (define_insn "sse2_vmdivv2df3"
1628 [(set (match_operand:V2DF 0 "register_operand" "=x")
1629 (vec_merge:V2DF
1630 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1631 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1632 (match_dup 1)
1633 (const_int 1)))]
1634 "TARGET_SSE2"
1635 "divsd\t{%2, %0|%0, %2}"
1636 [(set_attr "type" "ssediv")
1637 (set_attr "mode" "DF")])
1638
1639 (define_insn "sqrtv2df2"
1640 [(set (match_operand:V2DF 0 "register_operand" "=x")
1641 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1642 "TARGET_SSE2"
1643 "sqrtpd\t{%1, %0|%0, %1}"
1644 [(set_attr "type" "sse")
1645 (set_attr "mode" "V2DF")])
1646
1647 (define_insn "sse2_vmsqrtv2df2"
1648 [(set (match_operand:V2DF 0 "register_operand" "=x")
1649 (vec_merge:V2DF
1650 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1651 (match_operand:V2DF 2 "register_operand" "0")
1652 (const_int 1)))]
1653 "TARGET_SSE2"
1654 "sqrtsd\t{%1, %0|%0, %1}"
1655 [(set_attr "type" "sse")
1656 (set_attr "mode" "DF")])
1657
1658 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1659 ;; isn't really correct, as those rtl operators aren't defined when
1660 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1661
1662 (define_expand "smaxv2df3"
1663 [(set (match_operand:V2DF 0 "register_operand" "")
1664 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1665 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1666 "TARGET_SSE2"
1667 {
1668 if (!flag_finite_math_only)
1669 operands[1] = force_reg (V2DFmode, operands[1]);
1670 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1671 })
1672
1673 (define_insn "*smaxv2df3_finite"
1674 [(set (match_operand:V2DF 0 "register_operand" "=x")
1675 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1676 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1677 "TARGET_SSE2 && flag_finite_math_only
1678 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1679 "maxpd\t{%2, %0|%0, %2}"
1680 [(set_attr "type" "sseadd")
1681 (set_attr "mode" "V2DF")])
1682
1683 (define_insn "*smaxv2df3"
1684 [(set (match_operand:V2DF 0 "register_operand" "=x")
1685 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1686 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1687 "TARGET_SSE2"
1688 "maxpd\t{%2, %0|%0, %2}"
1689 [(set_attr "type" "sseadd")
1690 (set_attr "mode" "V2DF")])
1691
1692 (define_insn "sse2_vmsmaxv2df3"
1693 [(set (match_operand:V2DF 0 "register_operand" "=x")
1694 (vec_merge:V2DF
1695 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1696 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1697 (match_dup 1)
1698 (const_int 1)))]
1699 "TARGET_SSE2"
1700 "maxsd\t{%2, %0|%0, %2}"
1701 [(set_attr "type" "sseadd")
1702 (set_attr "mode" "DF")])
1703
1704 (define_expand "sminv2df3"
1705 [(set (match_operand:V2DF 0 "register_operand" "")
1706 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1707 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1708 "TARGET_SSE2"
1709 {
1710 if (!flag_finite_math_only)
1711 operands[1] = force_reg (V2DFmode, operands[1]);
1712 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1713 })
1714
1715 (define_insn "*sminv2df3_finite"
1716 [(set (match_operand:V2DF 0 "register_operand" "=x")
1717 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1718 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1719 "TARGET_SSE2 && flag_finite_math_only
1720 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1721 "minpd\t{%2, %0|%0, %2}"
1722 [(set_attr "type" "sseadd")
1723 (set_attr "mode" "V2DF")])
1724
1725 (define_insn "*sminv2df3"
1726 [(set (match_operand:V2DF 0 "register_operand" "=x")
1727 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1728 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1729 "TARGET_SSE2"
1730 "minpd\t{%2, %0|%0, %2}"
1731 [(set_attr "type" "sseadd")
1732 (set_attr "mode" "V2DF")])
1733
1734 (define_insn "sse2_vmsminv2df3"
1735 [(set (match_operand:V2DF 0 "register_operand" "=x")
1736 (vec_merge:V2DF
1737 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1738 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1739 (match_dup 1)
1740 (const_int 1)))]
1741 "TARGET_SSE2"
1742 "minsd\t{%2, %0|%0, %2}"
1743 [(set_attr "type" "sseadd")
1744 (set_attr "mode" "DF")])
1745
1746 (define_insn "sse3_addsubv2df3"
1747 [(set (match_operand:V2DF 0 "register_operand" "=x")
1748 (vec_merge:V2DF
1749 (plus:V2DF
1750 (match_operand:V2DF 1 "register_operand" "0")
1751 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1752 (minus:V2DF (match_dup 1) (match_dup 2))
1753 (const_int 1)))]
1754 "TARGET_SSE3"
1755 "addsubpd\t{%2, %0|%0, %2}"
1756 [(set_attr "type" "sseadd")
1757 (set_attr "mode" "V2DF")])
1758
1759 (define_insn "sse3_haddv2df3"
1760 [(set (match_operand:V2DF 0 "register_operand" "=x")
1761 (vec_concat:V2DF
1762 (plus:DF
1763 (vec_select:DF
1764 (match_operand:V2DF 1 "register_operand" "0")
1765 (parallel [(const_int 0)]))
1766 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1767 (plus:DF
1768 (vec_select:DF
1769 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1770 (parallel [(const_int 0)]))
1771 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1772 "TARGET_SSE3"
1773 "haddpd\t{%2, %0|%0, %2}"
1774 [(set_attr "type" "sseadd")
1775 (set_attr "mode" "V2DF")])
1776
1777 (define_insn "sse3_hsubv2df3"
1778 [(set (match_operand:V2DF 0 "register_operand" "=x")
1779 (vec_concat:V2DF
1780 (minus:DF
1781 (vec_select:DF
1782 (match_operand:V2DF 1 "register_operand" "0")
1783 (parallel [(const_int 0)]))
1784 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1785 (minus:DF
1786 (vec_select:DF
1787 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1788 (parallel [(const_int 0)]))
1789 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1790 "TARGET_SSE3"
1791 "hsubpd\t{%2, %0|%0, %2}"
1792 [(set_attr "type" "sseadd")
1793 (set_attr "mode" "V2DF")])
1794
1795 (define_expand "reduc_splus_v2df"
1796 [(match_operand:V2DF 0 "register_operand" "")
1797 (match_operand:V2DF 1 "register_operand" "")]
1798 "TARGET_SSE3"
1799 {
1800 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1801 DONE;
1802 })
1803
1804 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1805 ;;
1806 ;; Parallel double-precision floating point comparisons
1807 ;;
1808 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1809
1810 (define_insn "sse2_maskcmpv2df3"
1811 [(set (match_operand:V2DF 0 "register_operand" "=x")
1812 (match_operator:V2DF 3 "sse_comparison_operator"
1813 [(match_operand:V2DF 1 "register_operand" "0")
1814 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1815 "TARGET_SSE2"
1816 "cmp%D3pd\t{%2, %0|%0, %2}"
1817 [(set_attr "type" "ssecmp")
1818 (set_attr "mode" "V2DF")])
1819
1820 (define_insn "sse2_maskcmpdf3"
1821 [(set (match_operand:DF 0 "register_operand" "=x")
1822 (match_operator:DF 3 "sse_comparison_operator"
1823 [(match_operand:DF 1 "register_operand" "0")
1824 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1825 "TARGET_SSE2"
1826 "cmp%D3sd\t{%2, %0|%0, %2}"
1827 [(set_attr "type" "ssecmp")
1828 (set_attr "mode" "DF")])
1829
1830 (define_insn "sse2_vmmaskcmpv2df3"
1831 [(set (match_operand:V2DF 0 "register_operand" "=x")
1832 (vec_merge:V2DF
1833 (match_operator:V2DF 3 "sse_comparison_operator"
1834 [(match_operand:V2DF 1 "register_operand" "0")
1835 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1836 (match_dup 1)
1837 (const_int 1)))]
1838 "TARGET_SSE2"
1839 "cmp%D3sd\t{%2, %0|%0, %2}"
1840 [(set_attr "type" "ssecmp")
1841 (set_attr "mode" "DF")])
1842
1843 (define_insn "sse2_comi"
1844 [(set (reg:CCFP FLAGS_REG)
1845 (compare:CCFP
1846 (vec_select:DF
1847 (match_operand:V2DF 0 "register_operand" "x")
1848 (parallel [(const_int 0)]))
1849 (vec_select:DF
1850 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1851 (parallel [(const_int 0)]))))]
1852 "TARGET_SSE2"
1853 "comisd\t{%1, %0|%0, %1}"
1854 [(set_attr "type" "ssecomi")
1855 (set_attr "mode" "DF")])
1856
1857 (define_insn "sse2_ucomi"
1858 [(set (reg:CCFPU FLAGS_REG)
1859 (compare:CCFPU
1860 (vec_select:DF
1861 (match_operand:V2DF 0 "register_operand" "x")
1862 (parallel [(const_int 0)]))
1863 (vec_select:DF
1864 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1865 (parallel [(const_int 0)]))))]
1866 "TARGET_SSE2"
1867 "ucomisd\t{%1, %0|%0, %1}"
1868 [(set_attr "type" "ssecomi")
1869 (set_attr "mode" "DF")])
1870
1871 (define_expand "vcondv2df"
1872 [(set (match_operand:V2DF 0 "register_operand" "")
1873 (if_then_else:V2DF
1874 (match_operator 3 ""
1875 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1876 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1877 (match_operand:V2DF 1 "general_operand" "")
1878 (match_operand:V2DF 2 "general_operand" "")))]
1879 "TARGET_SSE2"
1880 {
1881 if (ix86_expand_fp_vcond (operands))
1882 DONE;
1883 else
1884 FAIL;
1885 })
1886
1887 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1888 ;;
1889 ;; Parallel double-precision floating point logical operations
1890 ;;
1891 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1892
1893 (define_expand "andv2df3"
1894 [(set (match_operand:V2DF 0 "register_operand" "")
1895 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1896 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1897 "TARGET_SSE2"
1898 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1899
1900 (define_insn "*andv2df3"
1901 [(set (match_operand:V2DF 0 "register_operand" "=x")
1902 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1903 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1904 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1905 "andpd\t{%2, %0|%0, %2}"
1906 [(set_attr "type" "sselog")
1907 (set_attr "mode" "V2DF")])
1908
1909 (define_insn "sse2_nandv2df3"
1910 [(set (match_operand:V2DF 0 "register_operand" "=x")
1911 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1912 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1913 "TARGET_SSE2"
1914 "andnpd\t{%2, %0|%0, %2}"
1915 [(set_attr "type" "sselog")
1916 (set_attr "mode" "V2DF")])
1917
1918 (define_expand "iorv2df3"
1919 [(set (match_operand:V2DF 0 "register_operand" "")
1920 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1921 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1922 "TARGET_SSE2"
1923 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1924
1925 (define_insn "*iorv2df3"
1926 [(set (match_operand:V2DF 0 "register_operand" "=x")
1927 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1928 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1929 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1930 "orpd\t{%2, %0|%0, %2}"
1931 [(set_attr "type" "sselog")
1932 (set_attr "mode" "V2DF")])
1933
1934 (define_expand "xorv2df3"
1935 [(set (match_operand:V2DF 0 "register_operand" "")
1936 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1937 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1938 "TARGET_SSE2"
1939 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1940
1941 (define_insn "*xorv2df3"
1942 [(set (match_operand:V2DF 0 "register_operand" "=x")
1943 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1944 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1945 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1946 "xorpd\t{%2, %0|%0, %2}"
1947 [(set_attr "type" "sselog")
1948 (set_attr "mode" "V2DF")])
1949
1950 ;; Also define scalar versions. These are used for abs, neg, and
1951 ;; conditional move. Using subregs into vector modes causes register
1952 ;; allocation lossage. These patterns do not allow memory operands
1953 ;; because the native instructions read the full 128-bits.
1954
1955 (define_insn "*anddf3"
1956 [(set (match_operand:DF 0 "register_operand" "=x")
1957 (and:DF (match_operand:DF 1 "register_operand" "0")
1958 (match_operand:DF 2 "register_operand" "x")))]
1959 "TARGET_SSE2"
1960 "andpd\t{%2, %0|%0, %2}"
1961 [(set_attr "type" "sselog")
1962 (set_attr "mode" "V2DF")])
1963
1964 (define_insn "*nanddf3"
1965 [(set (match_operand:DF 0 "register_operand" "=x")
1966 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1967 (match_operand:DF 2 "register_operand" "x")))]
1968 "TARGET_SSE2"
1969 "andnpd\t{%2, %0|%0, %2}"
1970 [(set_attr "type" "sselog")
1971 (set_attr "mode" "V2DF")])
1972
1973 (define_insn "*iordf3"
1974 [(set (match_operand:DF 0 "register_operand" "=x")
1975 (ior:DF (match_operand:DF 1 "register_operand" "0")
1976 (match_operand:DF 2 "register_operand" "x")))]
1977 "TARGET_SSE2"
1978 "orpd\t{%2, %0|%0, %2}"
1979 [(set_attr "type" "sselog")
1980 (set_attr "mode" "V2DF")])
1981
1982 (define_insn "*xordf3"
1983 [(set (match_operand:DF 0 "register_operand" "=x")
1984 (xor:DF (match_operand:DF 1 "register_operand" "0")
1985 (match_operand:DF 2 "register_operand" "x")))]
1986 "TARGET_SSE2"
1987 "xorpd\t{%2, %0|%0, %2}"
1988 [(set_attr "type" "sselog")
1989 (set_attr "mode" "V2DF")])
1990
1991 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1992 ;;
1993 ;; Parallel double-precision floating point conversion operations
1994 ;;
1995 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1996
1997 (define_insn "sse2_cvtpi2pd"
1998 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1999 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2000 "TARGET_SSE2"
2001 "cvtpi2pd\t{%1, %0|%0, %1}"
2002 [(set_attr "type" "ssecvt")
2003 (set_attr "unit" "mmx,*")
2004 (set_attr "mode" "V2DF")])
2005
2006 (define_insn "sse2_cvtpd2pi"
2007 [(set (match_operand:V2SI 0 "register_operand" "=y")
2008 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2009 UNSPEC_FIX_NOTRUNC))]
2010 "TARGET_SSE2"
2011 "cvtpd2pi\t{%1, %0|%0, %1}"
2012 [(set_attr "type" "ssecvt")
2013 (set_attr "unit" "mmx")
2014 (set_attr "prefix_data16" "1")
2015 (set_attr "mode" "DI")])
2016
2017 (define_insn "sse2_cvttpd2pi"
2018 [(set (match_operand:V2SI 0 "register_operand" "=y")
2019 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2020 "TARGET_SSE2"
2021 "cvttpd2pi\t{%1, %0|%0, %1}"
2022 [(set_attr "type" "ssecvt")
2023 (set_attr "unit" "mmx")
2024 (set_attr "prefix_data16" "1")
2025 (set_attr "mode" "TI")])
2026
2027 (define_insn "sse2_cvtsi2sd"
2028 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2029 (vec_merge:V2DF
2030 (vec_duplicate:V2DF
2031 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2032 (match_operand:V2DF 1 "register_operand" "0,0")
2033 (const_int 1)))]
2034 "TARGET_SSE2"
2035 "cvtsi2sd\t{%2, %0|%0, %2}"
2036 [(set_attr "type" "sseicvt")
2037 (set_attr "mode" "DF")
2038 (set_attr "athlon_decode" "double,direct")
2039 (set_attr "amdfam10_decode" "vector,double")])
2040
2041 (define_insn "sse2_cvtsi2sdq"
2042 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2043 (vec_merge:V2DF
2044 (vec_duplicate:V2DF
2045 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2046 (match_operand:V2DF 1 "register_operand" "0,0")
2047 (const_int 1)))]
2048 "TARGET_SSE2 && TARGET_64BIT"
2049 "cvtsi2sdq\t{%2, %0|%0, %2}"
2050 [(set_attr "type" "sseicvt")
2051 (set_attr "mode" "DF")
2052 (set_attr "athlon_decode" "double,direct")
2053 (set_attr "amdfam10_decode" "vector,double")])
2054
2055 (define_insn "sse2_cvtsd2si"
2056 [(set (match_operand:SI 0 "register_operand" "=r,r")
2057 (unspec:SI
2058 [(vec_select:DF
2059 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2060 (parallel [(const_int 0)]))]
2061 UNSPEC_FIX_NOTRUNC))]
2062 "TARGET_SSE2"
2063 "cvtsd2si\t{%1, %0|%0, %1}"
2064 [(set_attr "type" "sseicvt")
2065 (set_attr "athlon_decode" "double,vector")
2066 (set_attr "prefix_rep" "1")
2067 (set_attr "mode" "SI")])
2068
2069 (define_insn "sse2_cvtsd2si_2"
2070 [(set (match_operand:SI 0 "register_operand" "=r,r")
2071 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2072 UNSPEC_FIX_NOTRUNC))]
2073 "TARGET_SSE2"
2074 "cvtsd2si\t{%1, %0|%0, %1}"
2075 [(set_attr "type" "sseicvt")
2076 (set_attr "athlon_decode" "double,vector")
2077 (set_attr "amdfam10_decode" "double,double")
2078 (set_attr "prefix_rep" "1")
2079 (set_attr "mode" "SI")])
2080
2081 (define_insn "sse2_cvtsd2siq"
2082 [(set (match_operand:DI 0 "register_operand" "=r,r")
2083 (unspec:DI
2084 [(vec_select:DF
2085 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2086 (parallel [(const_int 0)]))]
2087 UNSPEC_FIX_NOTRUNC))]
2088 "TARGET_SSE2 && TARGET_64BIT"
2089 "cvtsd2siq\t{%1, %0|%0, %1}"
2090 [(set_attr "type" "sseicvt")
2091 (set_attr "athlon_decode" "double,vector")
2092 (set_attr "prefix_rep" "1")
2093 (set_attr "mode" "DI")])
2094
2095 (define_insn "sse2_cvtsd2siq_2"
2096 [(set (match_operand:DI 0 "register_operand" "=r,r")
2097 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2098 UNSPEC_FIX_NOTRUNC))]
2099 "TARGET_SSE2 && TARGET_64BIT"
2100 "cvtsd2siq\t{%1, %0|%0, %1}"
2101 [(set_attr "type" "sseicvt")
2102 (set_attr "athlon_decode" "double,vector")
2103 (set_attr "amdfam10_decode" "double,double")
2104 (set_attr "prefix_rep" "1")
2105 (set_attr "mode" "DI")])
2106
2107 (define_insn "sse2_cvttsd2si"
2108 [(set (match_operand:SI 0 "register_operand" "=r,r")
2109 (fix:SI
2110 (vec_select:DF
2111 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2112 (parallel [(const_int 0)]))))]
2113 "TARGET_SSE2"
2114 "cvttsd2si\t{%1, %0|%0, %1}"
2115 [(set_attr "type" "sseicvt")
2116 (set_attr "prefix_rep" "1")
2117 (set_attr "mode" "SI")
2118 (set_attr "athlon_decode" "double,vector")
2119 (set_attr "amdfam10_decode" "double,double")])
2120
2121 (define_insn "sse2_cvttsd2siq"
2122 [(set (match_operand:DI 0 "register_operand" "=r,r")
2123 (fix:DI
2124 (vec_select:DF
2125 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2126 (parallel [(const_int 0)]))))]
2127 "TARGET_SSE2 && TARGET_64BIT"
2128 "cvttsd2siq\t{%1, %0|%0, %1}"
2129 [(set_attr "type" "sseicvt")
2130 (set_attr "prefix_rep" "1")
2131 (set_attr "mode" "DI")
2132 (set_attr "athlon_decode" "double,vector")
2133 (set_attr "amdfam10_decode" "double,double")])
2134
2135 (define_insn "sse2_cvtdq2pd"
2136 [(set (match_operand:V2DF 0 "register_operand" "=x")
2137 (float:V2DF
2138 (vec_select:V2SI
2139 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2140 (parallel [(const_int 0) (const_int 1)]))))]
2141 "TARGET_SSE2"
2142 "cvtdq2pd\t{%1, %0|%0, %1}"
2143 [(set_attr "type" "ssecvt")
2144 (set_attr "mode" "V2DF")])
2145
2146 (define_expand "sse2_cvtpd2dq"
2147 [(set (match_operand:V4SI 0 "register_operand" "")
2148 (vec_concat:V4SI
2149 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2150 UNSPEC_FIX_NOTRUNC)
2151 (match_dup 2)))]
2152 "TARGET_SSE2"
2153 "operands[2] = CONST0_RTX (V2SImode);")
2154
2155 (define_insn "*sse2_cvtpd2dq"
2156 [(set (match_operand:V4SI 0 "register_operand" "=x")
2157 (vec_concat:V4SI
2158 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2159 UNSPEC_FIX_NOTRUNC)
2160 (match_operand:V2SI 2 "const0_operand" "")))]
2161 "TARGET_SSE2"
2162 "cvtpd2dq\t{%1, %0|%0, %1}"
2163 [(set_attr "type" "ssecvt")
2164 (set_attr "prefix_rep" "1")
2165 (set_attr "mode" "TI")
2166 (set_attr "amdfam10_decode" "double")])
2167
2168 (define_expand "sse2_cvttpd2dq"
2169 [(set (match_operand:V4SI 0 "register_operand" "")
2170 (vec_concat:V4SI
2171 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2172 (match_dup 2)))]
2173 "TARGET_SSE2"
2174 "operands[2] = CONST0_RTX (V2SImode);")
2175
2176 (define_insn "*sse2_cvttpd2dq"
2177 [(set (match_operand:V4SI 0 "register_operand" "=x")
2178 (vec_concat:V4SI
2179 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2180 (match_operand:V2SI 2 "const0_operand" "")))]
2181 "TARGET_SSE2"
2182 "cvttpd2dq\t{%1, %0|%0, %1}"
2183 [(set_attr "type" "ssecvt")
2184 (set_attr "prefix_rep" "1")
2185 (set_attr "mode" "TI")
2186 (set_attr "amdfam10_decode" "double")])
2187
2188 (define_insn "sse2_cvtsd2ss"
2189 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2190 (vec_merge:V4SF
2191 (vec_duplicate:V4SF
2192 (float_truncate:V2SF
2193 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2194 (match_operand:V4SF 1 "register_operand" "0,0")
2195 (const_int 1)))]
2196 "TARGET_SSE2"
2197 "cvtsd2ss\t{%2, %0|%0, %2}"
2198 [(set_attr "type" "ssecvt")
2199 (set_attr "athlon_decode" "vector,double")
2200 (set_attr "amdfam10_decode" "vector,double")
2201 (set_attr "mode" "SF")])
2202
2203 (define_insn "sse2_cvtss2sd"
2204 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2205 (vec_merge:V2DF
2206 (float_extend:V2DF
2207 (vec_select:V2SF
2208 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2209 (parallel [(const_int 0) (const_int 1)])))
2210 (match_operand:V2DF 1 "register_operand" "0,0")
2211 (const_int 1)))]
2212 "TARGET_SSE2"
2213 "cvtss2sd\t{%2, %0|%0, %2}"
2214 [(set_attr "type" "ssecvt")
2215 (set_attr "amdfam10_decode" "vector,double")
2216 (set_attr "mode" "DF")])
2217
2218 (define_expand "sse2_cvtpd2ps"
2219 [(set (match_operand:V4SF 0 "register_operand" "")
2220 (vec_concat:V4SF
2221 (float_truncate:V2SF
2222 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2223 (match_dup 2)))]
2224 "TARGET_SSE2"
2225 "operands[2] = CONST0_RTX (V2SFmode);")
2226
2227 (define_insn "*sse2_cvtpd2ps"
2228 [(set (match_operand:V4SF 0 "register_operand" "=x")
2229 (vec_concat:V4SF
2230 (float_truncate:V2SF
2231 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2232 (match_operand:V2SF 2 "const0_operand" "")))]
2233 "TARGET_SSE2"
2234 "cvtpd2ps\t{%1, %0|%0, %1}"
2235 [(set_attr "type" "ssecvt")
2236 (set_attr "prefix_data16" "1")
2237 (set_attr "mode" "V4SF")
2238 (set_attr "amdfam10_decode" "double")])
2239
2240 (define_insn "sse2_cvtps2pd"
2241 [(set (match_operand:V2DF 0 "register_operand" "=x")
2242 (float_extend:V2DF
2243 (vec_select:V2SF
2244 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2245 (parallel [(const_int 0) (const_int 1)]))))]
2246 "TARGET_SSE2"
2247 "cvtps2pd\t{%1, %0|%0, %1}"
2248 [(set_attr "type" "ssecvt")
2249 (set_attr "mode" "V2DF")
2250 (set_attr "amdfam10_decode" "direct")])
2251
2252 (define_expand "vec_unpacks_hi_v4sf"
2253 [(set (match_dup 2)
2254 (vec_select:V4SF
2255 (vec_concat:V8SF
2256 (match_dup 2)
2257 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2258 (parallel [(const_int 6)
2259 (const_int 7)
2260 (const_int 2)
2261 (const_int 3)])))
2262 (set (match_operand:V2DF 0 "register_operand" "")
2263 (float_extend:V2DF
2264 (vec_select:V2SF
2265 (match_dup 2)
2266 (parallel [(const_int 0) (const_int 1)]))))]
2267 "TARGET_SSE2"
2268 {
2269 operands[2] = gen_reg_rtx (V4SFmode);
2270 })
2271
2272 (define_expand "vec_unpacks_lo_v4sf"
2273 [(set (match_operand:V2DF 0 "register_operand" "")
2274 (float_extend:V2DF
2275 (vec_select:V2SF
2276 (match_operand:V4SF 1 "nonimmediate_operand" "")
2277 (parallel [(const_int 0) (const_int 1)]))))]
2278 "TARGET_SSE2")
2279
2280 (define_expand "vec_unpacks_float_hi_v8hi"
2281 [(match_operand:V4SF 0 "register_operand" "")
2282 (match_operand:V8HI 1 "register_operand" "")]
2283 "TARGET_SSE2"
2284 {
2285 rtx tmp = gen_reg_rtx (V4SImode);
2286
2287 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2288 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2289 DONE;
2290 })
2291
2292 (define_expand "vec_unpacks_float_lo_v8hi"
2293 [(match_operand:V4SF 0 "register_operand" "")
2294 (match_operand:V8HI 1 "register_operand" "")]
2295 "TARGET_SSE2"
2296 {
2297 rtx tmp = gen_reg_rtx (V4SImode);
2298
2299 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2300 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2301 DONE;
2302 })
2303
2304 (define_expand "vec_unpacku_float_hi_v8hi"
2305 [(match_operand:V4SF 0 "register_operand" "")
2306 (match_operand:V8HI 1 "register_operand" "")]
2307 "TARGET_SSE2"
2308 {
2309 rtx tmp = gen_reg_rtx (V4SImode);
2310
2311 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2312 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2313 DONE;
2314 })
2315
2316 (define_expand "vec_unpacku_float_lo_v8hi"
2317 [(match_operand:V4SF 0 "register_operand" "")
2318 (match_operand:V8HI 1 "register_operand" "")]
2319 "TARGET_SSE2"
2320 {
2321 rtx tmp = gen_reg_rtx (V4SImode);
2322
2323 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2324 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2325 DONE;
2326 })
2327
2328 (define_expand "vec_unpacks_float_hi_v4si"
2329 [(set (match_dup 2)
2330 (vec_select:V4SI
2331 (match_operand:V4SI 1 "nonimmediate_operand" "")
2332 (parallel [(const_int 2)
2333 (const_int 3)
2334 (const_int 2)
2335 (const_int 3)])))
2336 (set (match_operand:V2DF 0 "register_operand" "")
2337 (float:V2DF
2338 (vec_select:V2SI
2339 (match_dup 2)
2340 (parallel [(const_int 0) (const_int 1)]))))]
2341 "TARGET_SSE2"
2342 {
2343 operands[2] = gen_reg_rtx (V4SImode);
2344 })
2345
2346 (define_expand "vec_unpacks_float_lo_v4si"
2347 [(set (match_operand:V2DF 0 "register_operand" "")
2348 (float:V2DF
2349 (vec_select:V2SI
2350 (match_operand:V4SI 1 "nonimmediate_operand" "")
2351 (parallel [(const_int 0) (const_int 1)]))))]
2352 "TARGET_SSE2")
2353
2354 (define_expand "vec_pack_trunc_v2df"
2355 [(match_operand:V4SF 0 "register_operand" "")
2356 (match_operand:V2DF 1 "nonimmediate_operand" "")
2357 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2358 "TARGET_SSE2"
2359 {
2360 rtx r1, r2;
2361
2362 r1 = gen_reg_rtx (V4SFmode);
2363 r2 = gen_reg_rtx (V4SFmode);
2364
2365 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2366 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2367 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2368 DONE;
2369 })
2370
2371 (define_expand "vec_pack_sfix_trunc_v2df"
2372 [(match_operand:V4SI 0 "register_operand" "")
2373 (match_operand:V2DF 1 "nonimmediate_operand" "")
2374 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2375 "TARGET_SSE2"
2376 {
2377 rtx r1, r2;
2378
2379 r1 = gen_reg_rtx (V4SImode);
2380 r2 = gen_reg_rtx (V4SImode);
2381
2382 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2383 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2384 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2385 gen_lowpart (V2DImode, r1),
2386 gen_lowpart (V2DImode, r2)));
2387 DONE;
2388 })
2389
2390 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2391 ;;
2392 ;; Parallel double-precision floating point element swizzling
2393 ;;
2394 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2395
2396 (define_insn "sse2_unpckhpd"
2397 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2398 (vec_select:V2DF
2399 (vec_concat:V4DF
2400 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2401 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2402 (parallel [(const_int 1)
2403 (const_int 3)])))]
2404 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2405 "@
2406 unpckhpd\t{%2, %0|%0, %2}
2407 movlpd\t{%H1, %0|%0, %H1}
2408 movhpd\t{%1, %0|%0, %1}"
2409 [(set_attr "type" "sselog,ssemov,ssemov")
2410 (set_attr "mode" "V2DF,V1DF,V1DF")])
2411
2412 (define_insn "*sse3_movddup"
2413 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2414 (vec_select:V2DF
2415 (vec_concat:V4DF
2416 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2417 (match_dup 1))
2418 (parallel [(const_int 0)
2419 (const_int 2)])))]
2420 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2421 "@
2422 movddup\t{%1, %0|%0, %1}
2423 #"
2424 [(set_attr "type" "sselog1,ssemov")
2425 (set_attr "mode" "V2DF")])
2426
2427 (define_split
2428 [(set (match_operand:V2DF 0 "memory_operand" "")
2429 (vec_select:V2DF
2430 (vec_concat:V4DF
2431 (match_operand:V2DF 1 "register_operand" "")
2432 (match_dup 1))
2433 (parallel [(const_int 0)
2434 (const_int 2)])))]
2435 "TARGET_SSE3 && reload_completed"
2436 [(const_int 0)]
2437 {
2438 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2439 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2440 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2441 DONE;
2442 })
2443
2444 (define_insn "sse2_unpcklpd"
2445 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2446 (vec_select:V2DF
2447 (vec_concat:V4DF
2448 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2449 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2450 (parallel [(const_int 0)
2451 (const_int 2)])))]
2452 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2453 "@
2454 unpcklpd\t{%2, %0|%0, %2}
2455 movhpd\t{%2, %0|%0, %2}
2456 movlpd\t{%2, %H0|%H0, %2}"
2457 [(set_attr "type" "sselog,ssemov,ssemov")
2458 (set_attr "mode" "V2DF,V1DF,V1DF")])
2459
2460 (define_expand "sse2_shufpd"
2461 [(match_operand:V2DF 0 "register_operand" "")
2462 (match_operand:V2DF 1 "register_operand" "")
2463 (match_operand:V2DF 2 "nonimmediate_operand" "")
2464 (match_operand:SI 3 "const_int_operand" "")]
2465 "TARGET_SSE2"
2466 {
2467 int mask = INTVAL (operands[3]);
2468 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2469 GEN_INT (mask & 1),
2470 GEN_INT (mask & 2 ? 3 : 2)));
2471 DONE;
2472 })
2473
2474 (define_insn "sse2_shufpd_1"
2475 [(set (match_operand:V2DF 0 "register_operand" "=x")
2476 (vec_select:V2DF
2477 (vec_concat:V4DF
2478 (match_operand:V2DF 1 "register_operand" "0")
2479 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2480 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2481 (match_operand 4 "const_2_to_3_operand" "")])))]
2482 "TARGET_SSE2"
2483 {
2484 int mask;
2485 mask = INTVAL (operands[3]);
2486 mask |= (INTVAL (operands[4]) - 2) << 1;
2487 operands[3] = GEN_INT (mask);
2488
2489 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2490 }
2491 [(set_attr "type" "sselog")
2492 (set_attr "mode" "V2DF")])
2493
2494 (define_insn "sse2_storehpd"
2495 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2496 (vec_select:DF
2497 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2498 (parallel [(const_int 1)])))]
2499 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2500 "@
2501 movhpd\t{%1, %0|%0, %1}
2502 unpckhpd\t%0, %0
2503 #"
2504 [(set_attr "type" "ssemov,sselog1,ssemov")
2505 (set_attr "mode" "V1DF,V2DF,DF")])
2506
2507 (define_split
2508 [(set (match_operand:DF 0 "register_operand" "")
2509 (vec_select:DF
2510 (match_operand:V2DF 1 "memory_operand" "")
2511 (parallel [(const_int 1)])))]
2512 "TARGET_SSE2 && reload_completed"
2513 [(set (match_dup 0) (match_dup 1))]
2514 {
2515 operands[1] = adjust_address (operands[1], DFmode, 8);
2516 })
2517
2518 (define_insn "sse2_storelpd"
2519 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2520 (vec_select:DF
2521 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2522 (parallel [(const_int 0)])))]
2523 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2524 "@
2525 movlpd\t{%1, %0|%0, %1}
2526 #
2527 #"
2528 [(set_attr "type" "ssemov")
2529 (set_attr "mode" "V1DF,DF,DF")])
2530
2531 (define_split
2532 [(set (match_operand:DF 0 "register_operand" "")
2533 (vec_select:DF
2534 (match_operand:V2DF 1 "nonimmediate_operand" "")
2535 (parallel [(const_int 0)])))]
2536 "TARGET_SSE2 && reload_completed"
2537 [(const_int 0)]
2538 {
2539 rtx op1 = operands[1];
2540 if (REG_P (op1))
2541 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2542 else
2543 op1 = gen_lowpart (DFmode, op1);
2544 emit_move_insn (operands[0], op1);
2545 DONE;
2546 })
2547
2548 (define_insn "sse2_loadhpd"
2549 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2550 (vec_concat:V2DF
2551 (vec_select:DF
2552 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2553 (parallel [(const_int 0)]))
2554 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2555 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2556 "@
2557 movhpd\t{%2, %0|%0, %2}
2558 unpcklpd\t{%2, %0|%0, %2}
2559 shufpd\t{$1, %1, %0|%0, %1, 1}
2560 #"
2561 [(set_attr "type" "ssemov,sselog,sselog,other")
2562 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2563
2564 (define_split
2565 [(set (match_operand:V2DF 0 "memory_operand" "")
2566 (vec_concat:V2DF
2567 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2568 (match_operand:DF 1 "register_operand" "")))]
2569 "TARGET_SSE2 && reload_completed"
2570 [(set (match_dup 0) (match_dup 1))]
2571 {
2572 operands[0] = adjust_address (operands[0], DFmode, 8);
2573 })
2574
2575 (define_insn "sse2_loadlpd"
2576 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2577 (vec_concat:V2DF
2578 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2579 (vec_select:DF
2580 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2581 (parallel [(const_int 1)]))))]
2582 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2583 "@
2584 movsd\t{%2, %0|%0, %2}
2585 movlpd\t{%2, %0|%0, %2}
2586 movsd\t{%2, %0|%0, %2}
2587 shufpd\t{$2, %2, %0|%0, %2, 2}
2588 movhpd\t{%H1, %0|%0, %H1}
2589 #"
2590 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2591 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2592
2593 (define_split
2594 [(set (match_operand:V2DF 0 "memory_operand" "")
2595 (vec_concat:V2DF
2596 (match_operand:DF 1 "register_operand" "")
2597 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2598 "TARGET_SSE2 && reload_completed"
2599 [(set (match_dup 0) (match_dup 1))]
2600 {
2601 operands[0] = adjust_address (operands[0], DFmode, 8);
2602 })
2603
2604 ;; Not sure these two are ever used, but it doesn't hurt to have
2605 ;; them. -aoliva
2606 (define_insn "*vec_extractv2df_1_sse"
2607 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2608 (vec_select:DF
2609 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2610 (parallel [(const_int 1)])))]
2611 "!TARGET_SSE2 && TARGET_SSE
2612 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2613 "@
2614 movhps\t{%1, %0|%0, %1}
2615 movhlps\t{%1, %0|%0, %1}
2616 movlps\t{%H1, %0|%0, %H1}"
2617 [(set_attr "type" "ssemov")
2618 (set_attr "mode" "V2SF,V4SF,V2SF")])
2619
2620 (define_insn "*vec_extractv2df_0_sse"
2621 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2622 (vec_select:DF
2623 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2624 (parallel [(const_int 0)])))]
2625 "!TARGET_SSE2 && TARGET_SSE
2626 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2627 "@
2628 movlps\t{%1, %0|%0, %1}
2629 movaps\t{%1, %0|%0, %1}
2630 movlps\t{%1, %0|%0, %1}"
2631 [(set_attr "type" "ssemov")
2632 (set_attr "mode" "V2SF,V4SF,V2SF")])
2633
2634 (define_insn "sse2_movsd"
2635 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2636 (vec_merge:V2DF
2637 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2638 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2639 (const_int 1)))]
2640 "TARGET_SSE2"
2641 "@
2642 movsd\t{%2, %0|%0, %2}
2643 movlpd\t{%2, %0|%0, %2}
2644 movlpd\t{%2, %0|%0, %2}
2645 shufpd\t{$2, %2, %0|%0, %2, 2}
2646 movhps\t{%H1, %0|%0, %H1}
2647 movhps\t{%1, %H0|%H0, %1}"
2648 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2649 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2650
2651 (define_insn "*vec_dupv2df_sse3"
2652 [(set (match_operand:V2DF 0 "register_operand" "=x")
2653 (vec_duplicate:V2DF
2654 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2655 "TARGET_SSE3"
2656 "movddup\t{%1, %0|%0, %1}"
2657 [(set_attr "type" "sselog1")
2658 (set_attr "mode" "DF")])
2659
2660 (define_insn "*vec_dupv2df"
2661 [(set (match_operand:V2DF 0 "register_operand" "=x")
2662 (vec_duplicate:V2DF
2663 (match_operand:DF 1 "register_operand" "0")))]
2664 "TARGET_SSE2"
2665 "unpcklpd\t%0, %0"
2666 [(set_attr "type" "sselog1")
2667 (set_attr "mode" "V2DF")])
2668
2669 (define_insn "*vec_concatv2df_sse3"
2670 [(set (match_operand:V2DF 0 "register_operand" "=x")
2671 (vec_concat:V2DF
2672 (match_operand:DF 1 "nonimmediate_operand" "xm")
2673 (match_dup 1)))]
2674 "TARGET_SSE3"
2675 "movddup\t{%1, %0|%0, %1}"
2676 [(set_attr "type" "sselog1")
2677 (set_attr "mode" "DF")])
2678
2679 (define_insn "*vec_concatv2df"
2680 [(set (match_operand:V2DF 0 "register_operand" "=Yt,Yt,Yt,x,x")
2681 (vec_concat:V2DF
2682 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2683 (match_operand:DF 2 "vector_move_operand" " Yt,m ,C ,x,m")))]
2684 "TARGET_SSE"
2685 "@
2686 unpcklpd\t{%2, %0|%0, %2}
2687 movhpd\t{%2, %0|%0, %2}
2688 movsd\t{%1, %0|%0, %1}
2689 movlhps\t{%2, %0|%0, %2}
2690 movhps\t{%2, %0|%0, %2}"
2691 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2692 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2693
2694 (define_expand "vec_setv2df"
2695 [(match_operand:V2DF 0 "register_operand" "")
2696 (match_operand:DF 1 "register_operand" "")
2697 (match_operand 2 "const_int_operand" "")]
2698 "TARGET_SSE"
2699 {
2700 ix86_expand_vector_set (false, operands[0], operands[1],
2701 INTVAL (operands[2]));
2702 DONE;
2703 })
2704
2705 (define_expand "vec_extractv2df"
2706 [(match_operand:DF 0 "register_operand" "")
2707 (match_operand:V2DF 1 "register_operand" "")
2708 (match_operand 2 "const_int_operand" "")]
2709 "TARGET_SSE"
2710 {
2711 ix86_expand_vector_extract (false, operands[0], operands[1],
2712 INTVAL (operands[2]));
2713 DONE;
2714 })
2715
2716 (define_expand "vec_initv2df"
2717 [(match_operand:V2DF 0 "register_operand" "")
2718 (match_operand 1 "" "")]
2719 "TARGET_SSE"
2720 {
2721 ix86_expand_vector_init (false, operands[0], operands[1]);
2722 DONE;
2723 })
2724
2725 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2726 ;;
2727 ;; Parallel integral arithmetic
2728 ;;
2729 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2730
2731 (define_expand "neg<mode>2"
2732 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2733 (minus:SSEMODEI
2734 (match_dup 2)
2735 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2736 "TARGET_SSE2"
2737 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2738
2739 (define_expand "add<mode>3"
2740 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2741 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2742 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2743 "TARGET_SSE2"
2744 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2745
2746 (define_insn "*add<mode>3"
2747 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2748 (plus:SSEMODEI
2749 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2750 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2751 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2752 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2753 [(set_attr "type" "sseiadd")
2754 (set_attr "prefix_data16" "1")
2755 (set_attr "mode" "TI")])
2756
2757 (define_insn "sse2_ssadd<mode>3"
2758 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2759 (ss_plus:SSEMODE12
2760 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2761 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2762 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2763 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2764 [(set_attr "type" "sseiadd")
2765 (set_attr "prefix_data16" "1")
2766 (set_attr "mode" "TI")])
2767
2768 (define_insn "sse2_usadd<mode>3"
2769 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2770 (us_plus:SSEMODE12
2771 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2772 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2773 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2774 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2775 [(set_attr "type" "sseiadd")
2776 (set_attr "prefix_data16" "1")
2777 (set_attr "mode" "TI")])
2778
2779 (define_expand "sub<mode>3"
2780 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2781 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2782 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2783 "TARGET_SSE2"
2784 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2785
2786 (define_insn "*sub<mode>3"
2787 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2788 (minus:SSEMODEI
2789 (match_operand:SSEMODEI 1 "register_operand" "0")
2790 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2791 "TARGET_SSE2"
2792 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2793 [(set_attr "type" "sseiadd")
2794 (set_attr "prefix_data16" "1")
2795 (set_attr "mode" "TI")])
2796
2797 (define_insn "sse2_sssub<mode>3"
2798 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2799 (ss_minus:SSEMODE12
2800 (match_operand:SSEMODE12 1 "register_operand" "0")
2801 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2802 "TARGET_SSE2"
2803 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2804 [(set_attr "type" "sseiadd")
2805 (set_attr "prefix_data16" "1")
2806 (set_attr "mode" "TI")])
2807
2808 (define_insn "sse2_ussub<mode>3"
2809 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2810 (us_minus:SSEMODE12
2811 (match_operand:SSEMODE12 1 "register_operand" "0")
2812 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2813 "TARGET_SSE2"
2814 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2815 [(set_attr "type" "sseiadd")
2816 (set_attr "prefix_data16" "1")
2817 (set_attr "mode" "TI")])
2818
2819 (define_expand "mulv16qi3"
2820 [(set (match_operand:V16QI 0 "register_operand" "")
2821 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2822 (match_operand:V16QI 2 "register_operand" "")))]
2823 "TARGET_SSE2"
2824 {
2825 rtx t[12], op0;
2826 int i;
2827
2828 for (i = 0; i < 12; ++i)
2829 t[i] = gen_reg_rtx (V16QImode);
2830
2831 /* Unpack data such that we've got a source byte in each low byte of
2832 each word. We don't care what goes into the high byte of each word.
2833 Rather than trying to get zero in there, most convenient is to let
2834 it be a copy of the low byte. */
2835 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2836 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2837 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2838 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2839
2840 /* Multiply words. The end-of-line annotations here give a picture of what
2841 the output of that instruction looks like. Dot means don't care; the
2842 letters are the bytes of the result with A being the most significant. */
2843 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2844 gen_lowpart (V8HImode, t[0]),
2845 gen_lowpart (V8HImode, t[1])));
2846 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2847 gen_lowpart (V8HImode, t[2]),
2848 gen_lowpart (V8HImode, t[3])));
2849
2850 /* Extract the relevant bytes and merge them back together. */
2851 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2852 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2853 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2854 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2855 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2856 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2857
2858 op0 = operands[0];
2859 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2860 DONE;
2861 })
2862
2863 (define_expand "mulv8hi3"
2864 [(set (match_operand:V8HI 0 "register_operand" "")
2865 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2866 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2867 "TARGET_SSE2"
2868 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2869
2870 (define_insn "*mulv8hi3"
2871 [(set (match_operand:V8HI 0 "register_operand" "=x")
2872 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2873 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2874 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2875 "pmullw\t{%2, %0|%0, %2}"
2876 [(set_attr "type" "sseimul")
2877 (set_attr "prefix_data16" "1")
2878 (set_attr "mode" "TI")])
2879
2880 (define_expand "smulv8hi3_highpart"
2881 [(set (match_operand:V8HI 0 "register_operand" "")
2882 (truncate:V8HI
2883 (lshiftrt:V8SI
2884 (mult:V8SI
2885 (sign_extend:V8SI
2886 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2887 (sign_extend:V8SI
2888 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2889 (const_int 16))))]
2890 "TARGET_SSE2"
2891 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2892
2893 (define_insn "*smulv8hi3_highpart"
2894 [(set (match_operand:V8HI 0 "register_operand" "=x")
2895 (truncate:V8HI
2896 (lshiftrt:V8SI
2897 (mult:V8SI
2898 (sign_extend:V8SI
2899 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2900 (sign_extend:V8SI
2901 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2902 (const_int 16))))]
2903 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2904 "pmulhw\t{%2, %0|%0, %2}"
2905 [(set_attr "type" "sseimul")
2906 (set_attr "prefix_data16" "1")
2907 (set_attr "mode" "TI")])
2908
2909 (define_expand "umulv8hi3_highpart"
2910 [(set (match_operand:V8HI 0 "register_operand" "")
2911 (truncate:V8HI
2912 (lshiftrt:V8SI
2913 (mult:V8SI
2914 (zero_extend:V8SI
2915 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2916 (zero_extend:V8SI
2917 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2918 (const_int 16))))]
2919 "TARGET_SSE2"
2920 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2921
2922 (define_insn "*umulv8hi3_highpart"
2923 [(set (match_operand:V8HI 0 "register_operand" "=x")
2924 (truncate:V8HI
2925 (lshiftrt:V8SI
2926 (mult:V8SI
2927 (zero_extend:V8SI
2928 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2929 (zero_extend:V8SI
2930 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2931 (const_int 16))))]
2932 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2933 "pmulhuw\t{%2, %0|%0, %2}"
2934 [(set_attr "type" "sseimul")
2935 (set_attr "prefix_data16" "1")
2936 (set_attr "mode" "TI")])
2937
2938 (define_insn "sse2_umulv2siv2di3"
2939 [(set (match_operand:V2DI 0 "register_operand" "=x")
2940 (mult:V2DI
2941 (zero_extend:V2DI
2942 (vec_select:V2SI
2943 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2944 (parallel [(const_int 0) (const_int 2)])))
2945 (zero_extend:V2DI
2946 (vec_select:V2SI
2947 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2948 (parallel [(const_int 0) (const_int 2)])))))]
2949 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
2950 "pmuludq\t{%2, %0|%0, %2}"
2951 [(set_attr "type" "sseimul")
2952 (set_attr "prefix_data16" "1")
2953 (set_attr "mode" "TI")])
2954
2955 (define_insn "sse4_1_mulv2siv2di3"
2956 [(set (match_operand:V2DI 0 "register_operand" "=x")
2957 (mult:V2DI
2958 (sign_extend:V2DI
2959 (vec_select:V2SI
2960 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2961 (parallel [(const_int 0) (const_int 2)])))
2962 (sign_extend:V2DI
2963 (vec_select:V2SI
2964 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2965 (parallel [(const_int 0) (const_int 2)])))))]
2966 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
2967 "pmuldq\t{%2, %0|%0, %2}"
2968 [(set_attr "type" "sseimul")
2969 (set_attr "prefix_extra" "1")
2970 (set_attr "mode" "TI")])
2971
2972 (define_insn "sse2_pmaddwd"
2973 [(set (match_operand:V4SI 0 "register_operand" "=x")
2974 (plus:V4SI
2975 (mult:V4SI
2976 (sign_extend:V4SI
2977 (vec_select:V4HI
2978 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2979 (parallel [(const_int 0)
2980 (const_int 2)
2981 (const_int 4)
2982 (const_int 6)])))
2983 (sign_extend:V4SI
2984 (vec_select:V4HI
2985 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2986 (parallel [(const_int 0)
2987 (const_int 2)
2988 (const_int 4)
2989 (const_int 6)]))))
2990 (mult:V4SI
2991 (sign_extend:V4SI
2992 (vec_select:V4HI (match_dup 1)
2993 (parallel [(const_int 1)
2994 (const_int 3)
2995 (const_int 5)
2996 (const_int 7)])))
2997 (sign_extend:V4SI
2998 (vec_select:V4HI (match_dup 2)
2999 (parallel [(const_int 1)
3000 (const_int 3)
3001 (const_int 5)
3002 (const_int 7)]))))))]
3003 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3004 "pmaddwd\t{%2, %0|%0, %2}"
3005 [(set_attr "type" "sseiadd")
3006 (set_attr "prefix_data16" "1")
3007 (set_attr "mode" "TI")])
3008
3009 (define_expand "mulv4si3"
3010 [(set (match_operand:V4SI 0 "register_operand" "")
3011 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3012 (match_operand:V4SI 2 "register_operand" "")))]
3013 "TARGET_SSE2"
3014 {
3015 if (TARGET_SSE4_1)
3016 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3017 else
3018 {
3019 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3020 rtx op0, op1, op2;
3021
3022 op0 = operands[0];
3023 op1 = operands[1];
3024 op2 = operands[2];
3025 t1 = gen_reg_rtx (V4SImode);
3026 t2 = gen_reg_rtx (V4SImode);
3027 t3 = gen_reg_rtx (V4SImode);
3028 t4 = gen_reg_rtx (V4SImode);
3029 t5 = gen_reg_rtx (V4SImode);
3030 t6 = gen_reg_rtx (V4SImode);
3031 thirtytwo = GEN_INT (32);
3032
3033 /* Multiply elements 2 and 0. */
3034 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3035 op1, op2));
3036
3037 /* Shift both input vectors down one element, so that elements 3
3038 and 1 are now in the slots for elements 2 and 0. For K8, at
3039 least, this is faster than using a shuffle. */
3040 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3041 gen_lowpart (TImode, op1),
3042 thirtytwo));
3043 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3044 gen_lowpart (TImode, op2),
3045 thirtytwo));
3046 /* Multiply elements 3 and 1. */
3047 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3048 t2, t3));
3049
3050 /* Move the results in element 2 down to element 1; we don't care
3051 what goes in elements 2 and 3. */
3052 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3053 const0_rtx, const0_rtx));
3054 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3055 const0_rtx, const0_rtx));
3056
3057 /* Merge the parts back together. */
3058 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3059 DONE;
3060 }
3061 })
3062
3063 (define_insn "*sse4_1_mulv4si3"
3064 [(set (match_operand:V4SI 0 "register_operand" "=x")
3065 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3066 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3067 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3068 "pmulld\t{%2, %0|%0, %2}"
3069 [(set_attr "type" "sseimul")
3070 (set_attr "prefix_extra" "1")
3071 (set_attr "mode" "TI")])
3072
3073 (define_expand "mulv2di3"
3074 [(set (match_operand:V2DI 0 "register_operand" "")
3075 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3076 (match_operand:V2DI 2 "register_operand" "")))]
3077 "TARGET_SSE2"
3078 {
3079 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3080 rtx op0, op1, op2;
3081
3082 op0 = operands[0];
3083 op1 = operands[1];
3084 op2 = operands[2];
3085 t1 = gen_reg_rtx (V2DImode);
3086 t2 = gen_reg_rtx (V2DImode);
3087 t3 = gen_reg_rtx (V2DImode);
3088 t4 = gen_reg_rtx (V2DImode);
3089 t5 = gen_reg_rtx (V2DImode);
3090 t6 = gen_reg_rtx (V2DImode);
3091 thirtytwo = GEN_INT (32);
3092
3093 /* Multiply low parts. */
3094 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3095 gen_lowpart (V4SImode, op2)));
3096
3097 /* Shift input vectors left 32 bits so we can multiply high parts. */
3098 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3099 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3100
3101 /* Multiply high parts by low parts. */
3102 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3103 gen_lowpart (V4SImode, t3)));
3104 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3105 gen_lowpart (V4SImode, t2)));
3106
3107 /* Shift them back. */
3108 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3109 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3110
3111 /* Add the three parts together. */
3112 emit_insn (gen_addv2di3 (t6, t1, t4));
3113 emit_insn (gen_addv2di3 (op0, t6, t5));
3114 DONE;
3115 })
3116
3117 (define_expand "vec_widen_smult_hi_v8hi"
3118 [(match_operand:V4SI 0 "register_operand" "")
3119 (match_operand:V8HI 1 "register_operand" "")
3120 (match_operand:V8HI 2 "register_operand" "")]
3121 "TARGET_SSE2"
3122 {
3123 rtx op1, op2, t1, t2, dest;
3124
3125 op1 = operands[1];
3126 op2 = operands[2];
3127 t1 = gen_reg_rtx (V8HImode);
3128 t2 = gen_reg_rtx (V8HImode);
3129 dest = gen_lowpart (V8HImode, operands[0]);
3130
3131 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3132 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3133 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3134 DONE;
3135 })
3136
3137 (define_expand "vec_widen_smult_lo_v8hi"
3138 [(match_operand:V4SI 0 "register_operand" "")
3139 (match_operand:V8HI 1 "register_operand" "")
3140 (match_operand:V8HI 2 "register_operand" "")]
3141 "TARGET_SSE2"
3142 {
3143 rtx op1, op2, t1, t2, dest;
3144
3145 op1 = operands[1];
3146 op2 = operands[2];
3147 t1 = gen_reg_rtx (V8HImode);
3148 t2 = gen_reg_rtx (V8HImode);
3149 dest = gen_lowpart (V8HImode, operands[0]);
3150
3151 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3152 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3153 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3154 DONE;
3155 })
3156
3157 (define_expand "vec_widen_umult_hi_v8hi"
3158 [(match_operand:V4SI 0 "register_operand" "")
3159 (match_operand:V8HI 1 "register_operand" "")
3160 (match_operand:V8HI 2 "register_operand" "")]
3161 "TARGET_SSE2"
3162 {
3163 rtx op1, op2, t1, t2, dest;
3164
3165 op1 = operands[1];
3166 op2 = operands[2];
3167 t1 = gen_reg_rtx (V8HImode);
3168 t2 = gen_reg_rtx (V8HImode);
3169 dest = gen_lowpart (V8HImode, operands[0]);
3170
3171 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3172 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3173 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3174 DONE;
3175 })
3176
3177 (define_expand "vec_widen_umult_lo_v8hi"
3178 [(match_operand:V4SI 0 "register_operand" "")
3179 (match_operand:V8HI 1 "register_operand" "")
3180 (match_operand:V8HI 2 "register_operand" "")]
3181 "TARGET_SSE2"
3182 {
3183 rtx op1, op2, t1, t2, dest;
3184
3185 op1 = operands[1];
3186 op2 = operands[2];
3187 t1 = gen_reg_rtx (V8HImode);
3188 t2 = gen_reg_rtx (V8HImode);
3189 dest = gen_lowpart (V8HImode, operands[0]);
3190
3191 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3192 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3193 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3194 DONE;
3195 })
3196
3197 (define_expand "vec_widen_smult_hi_v4si"
3198 [(match_operand:V2DI 0 "register_operand" "")
3199 (match_operand:V4SI 1 "register_operand" "")
3200 (match_operand:V4SI 2 "register_operand" "")]
3201 "TARGET_SSE2"
3202 {
3203 rtx op1, op2, t1, t2;
3204
3205 op1 = operands[1];
3206 op2 = operands[2];
3207 t1 = gen_reg_rtx (V4SImode);
3208 t2 = gen_reg_rtx (V4SImode);
3209
3210 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3211 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3212 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3213 DONE;
3214 })
3215
3216 (define_expand "vec_widen_smult_lo_v4si"
3217 [(match_operand:V2DI 0 "register_operand" "")
3218 (match_operand:V4SI 1 "register_operand" "")
3219 (match_operand:V4SI 2 "register_operand" "")]
3220 "TARGET_SSE2"
3221 {
3222 rtx op1, op2, t1, t2;
3223
3224 op1 = operands[1];
3225 op2 = operands[2];
3226 t1 = gen_reg_rtx (V4SImode);
3227 t2 = gen_reg_rtx (V4SImode);
3228
3229 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3230 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3231 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3232 DONE;
3233 })
3234
3235 (define_expand "vec_widen_umult_hi_v4si"
3236 [(match_operand:V2DI 0 "register_operand" "")
3237 (match_operand:V4SI 1 "register_operand" "")
3238 (match_operand:V4SI 2 "register_operand" "")]
3239 "TARGET_SSE2"
3240 {
3241 rtx op1, op2, t1, t2;
3242
3243 op1 = operands[1];
3244 op2 = operands[2];
3245 t1 = gen_reg_rtx (V4SImode);
3246 t2 = gen_reg_rtx (V4SImode);
3247
3248 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3249 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3250 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3251 DONE;
3252 })
3253
3254 (define_expand "vec_widen_umult_lo_v4si"
3255 [(match_operand:V2DI 0 "register_operand" "")
3256 (match_operand:V4SI 1 "register_operand" "")
3257 (match_operand:V4SI 2 "register_operand" "")]
3258 "TARGET_SSE2"
3259 {
3260 rtx op1, op2, t1, t2;
3261
3262 op1 = operands[1];
3263 op2 = operands[2];
3264 t1 = gen_reg_rtx (V4SImode);
3265 t2 = gen_reg_rtx (V4SImode);
3266
3267 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3268 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3269 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3270 DONE;
3271 })
3272
3273 (define_expand "sdot_prodv8hi"
3274 [(match_operand:V4SI 0 "register_operand" "")
3275 (match_operand:V8HI 1 "register_operand" "")
3276 (match_operand:V8HI 2 "register_operand" "")
3277 (match_operand:V4SI 3 "register_operand" "")]
3278 "TARGET_SSE2"
3279 {
3280 rtx t = gen_reg_rtx (V4SImode);
3281 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3282 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3283 DONE;
3284 })
3285
3286 (define_expand "udot_prodv4si"
3287 [(match_operand:V2DI 0 "register_operand" "")
3288 (match_operand:V4SI 1 "register_operand" "")
3289 (match_operand:V4SI 2 "register_operand" "")
3290 (match_operand:V2DI 3 "register_operand" "")]
3291 "TARGET_SSE2"
3292 {
3293 rtx t1, t2, t3, t4;
3294
3295 t1 = gen_reg_rtx (V2DImode);
3296 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3297 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3298
3299 t2 = gen_reg_rtx (V4SImode);
3300 t3 = gen_reg_rtx (V4SImode);
3301 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3302 gen_lowpart (TImode, operands[1]),
3303 GEN_INT (32)));
3304 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3305 gen_lowpart (TImode, operands[2]),
3306 GEN_INT (32)));
3307
3308 t4 = gen_reg_rtx (V2DImode);
3309 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3310
3311 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3312 DONE;
3313 })
3314
3315 (define_insn "ashr<mode>3"
3316 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3317 (ashiftrt:SSEMODE24
3318 (match_operand:SSEMODE24 1 "register_operand" "0")
3319 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3320 "TARGET_SSE2"
3321 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3322 [(set_attr "type" "sseishft")
3323 (set_attr "prefix_data16" "1")
3324 (set_attr "mode" "TI")])
3325
3326 (define_insn "lshr<mode>3"
3327 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3328 (lshiftrt:SSEMODE248
3329 (match_operand:SSEMODE248 1 "register_operand" "0")
3330 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3331 "TARGET_SSE2"
3332 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3333 [(set_attr "type" "sseishft")
3334 (set_attr "prefix_data16" "1")
3335 (set_attr "mode" "TI")])
3336
3337 (define_insn "ashl<mode>3"
3338 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3339 (ashift:SSEMODE248
3340 (match_operand:SSEMODE248 1 "register_operand" "0")
3341 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3342 "TARGET_SSE2"
3343 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3344 [(set_attr "type" "sseishft")
3345 (set_attr "prefix_data16" "1")
3346 (set_attr "mode" "TI")])
3347
3348 (define_insn "sse2_ashlti3"
3349 [(set (match_operand:TI 0 "register_operand" "=x")
3350 (ashift:TI (match_operand:TI 1 "register_operand" "0")
3351 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3352 "TARGET_SSE2"
3353 {
3354 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3355 return "pslldq\t{%2, %0|%0, %2}";
3356 }
3357 [(set_attr "type" "sseishft")
3358 (set_attr "prefix_data16" "1")
3359 (set_attr "mode" "TI")])
3360
3361 (define_expand "vec_shl_<mode>"
3362 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3363 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3364 (match_operand:SI 2 "general_operand" "")))]
3365 "TARGET_SSE2"
3366 {
3367 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3368 FAIL;
3369 operands[0] = gen_lowpart (TImode, operands[0]);
3370 operands[1] = gen_lowpart (TImode, operands[1]);
3371 })
3372
3373 (define_insn "sse2_lshrti3"
3374 [(set (match_operand:TI 0 "register_operand" "=x")
3375 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
3376 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
3377 "TARGET_SSE2"
3378 {
3379 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
3380 return "psrldq\t{%2, %0|%0, %2}";
3381 }
3382 [(set_attr "type" "sseishft")
3383 (set_attr "prefix_data16" "1")
3384 (set_attr "mode" "TI")])
3385
3386 (define_expand "vec_shr_<mode>"
3387 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3388 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3389 (match_operand:SI 2 "general_operand" "")))]
3390 "TARGET_SSE2"
3391 {
3392 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3393 FAIL;
3394 operands[0] = gen_lowpart (TImode, operands[0]);
3395 operands[1] = gen_lowpart (TImode, operands[1]);
3396 })
3397
3398 (define_expand "umaxv16qi3"
3399 [(set (match_operand:V16QI 0 "register_operand" "")
3400 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3401 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3402 "TARGET_SSE2"
3403 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3404
3405 (define_insn "*umaxv16qi3"
3406 [(set (match_operand:V16QI 0 "register_operand" "=x")
3407 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3408 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3409 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3410 "pmaxub\t{%2, %0|%0, %2}"
3411 [(set_attr "type" "sseiadd")
3412 (set_attr "prefix_data16" "1")
3413 (set_attr "mode" "TI")])
3414
3415 (define_expand "smaxv8hi3"
3416 [(set (match_operand:V8HI 0 "register_operand" "")
3417 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3418 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3419 "TARGET_SSE2"
3420 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3421
3422 (define_insn "*smaxv8hi3"
3423 [(set (match_operand:V8HI 0 "register_operand" "=x")
3424 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3425 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3426 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3427 "pmaxsw\t{%2, %0|%0, %2}"
3428 [(set_attr "type" "sseiadd")
3429 (set_attr "prefix_data16" "1")
3430 (set_attr "mode" "TI")])
3431
3432 (define_expand "umaxv8hi3"
3433 [(set (match_operand:V8HI 0 "register_operand" "")
3434 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3435 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3436 "TARGET_SSE2"
3437 {
3438 if (TARGET_SSE4_1)
3439 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3440 else
3441 {
3442 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3443 if (rtx_equal_p (op3, op2))
3444 op3 = gen_reg_rtx (V8HImode);
3445 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3446 emit_insn (gen_addv8hi3 (op0, op3, op2));
3447 DONE;
3448 }
3449 })
3450
3451 (define_expand "smax<mode>3"
3452 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3453 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3454 (match_operand:SSEMODE14 2 "register_operand" "")))]
3455 "TARGET_SSE2"
3456 {
3457 if (TARGET_SSE4_1)
3458 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3459 else
3460 {
3461 rtx xops[6];
3462 bool ok;
3463
3464 xops[0] = operands[0];
3465 xops[1] = operands[1];
3466 xops[2] = operands[2];
3467 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3468 xops[4] = operands[1];
3469 xops[5] = operands[2];
3470 ok = ix86_expand_int_vcond (xops);
3471 gcc_assert (ok);
3472 DONE;
3473 }
3474 })
3475
3476 (define_insn "*sse4_1_smax<mode>3"
3477 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3478 (smax:SSEMODE14
3479 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3480 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3481 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
3482 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
3483 [(set_attr "type" "sseiadd")
3484 (set_attr "prefix_extra" "1")
3485 (set_attr "mode" "TI")])
3486
3487 (define_expand "umaxv4si3"
3488 [(set (match_operand:V4SI 0 "register_operand" "")
3489 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3490 (match_operand:V4SI 2 "register_operand" "")))]
3491 "TARGET_SSE2"
3492 {
3493 if (TARGET_SSE4_1)
3494 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3495 else
3496 {
3497 rtx xops[6];
3498 bool ok;
3499
3500 xops[0] = operands[0];
3501 xops[1] = operands[1];
3502 xops[2] = operands[2];
3503 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3504 xops[4] = operands[1];
3505 xops[5] = operands[2];
3506 ok = ix86_expand_int_vcond (xops);
3507 gcc_assert (ok);
3508 DONE;
3509 }
3510 })
3511
3512 (define_insn "*sse4_1_umax<mode>3"
3513 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3514 (umax:SSEMODE24
3515 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3516 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3517 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
3518 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
3519 [(set_attr "type" "sseiadd")
3520 (set_attr "prefix_extra" "1")
3521 (set_attr "mode" "TI")])
3522
3523 (define_expand "uminv16qi3"
3524 [(set (match_operand:V16QI 0 "register_operand" "")
3525 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3526 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3527 "TARGET_SSE2"
3528 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3529
3530 (define_insn "*uminv16qi3"
3531 [(set (match_operand:V16QI 0 "register_operand" "=x")
3532 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3533 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3534 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3535 "pminub\t{%2, %0|%0, %2}"
3536 [(set_attr "type" "sseiadd")
3537 (set_attr "prefix_data16" "1")
3538 (set_attr "mode" "TI")])
3539
3540 (define_expand "sminv8hi3"
3541 [(set (match_operand:V8HI 0 "register_operand" "")
3542 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3543 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3544 "TARGET_SSE2"
3545 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3546
3547 (define_insn "*sminv8hi3"
3548 [(set (match_operand:V8HI 0 "register_operand" "=x")
3549 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3550 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3551 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3552 "pminsw\t{%2, %0|%0, %2}"
3553 [(set_attr "type" "sseiadd")
3554 (set_attr "prefix_data16" "1")
3555 (set_attr "mode" "TI")])
3556
3557 (define_expand "smin<mode>3"
3558 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3559 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3560 (match_operand:SSEMODE14 2 "register_operand" "")))]
3561 "TARGET_SSE2"
3562 {
3563 if (TARGET_SSE4_1)
3564 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3565 else
3566 {
3567 rtx xops[6];
3568 bool ok;
3569
3570 xops[0] = operands[0];
3571 xops[1] = operands[2];
3572 xops[2] = operands[1];
3573 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3574 xops[4] = operands[1];
3575 xops[5] = operands[2];
3576 ok = ix86_expand_int_vcond (xops);
3577 gcc_assert (ok);
3578 DONE;
3579 }
3580 })
3581
3582 (define_insn "*sse4_1_smin<mode>3"
3583 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3584 (smin:SSEMODE14
3585 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3586 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3587 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
3588 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
3589 [(set_attr "type" "sseiadd")
3590 (set_attr "prefix_extra" "1")
3591 (set_attr "mode" "TI")])
3592
3593 (define_expand "umin<mode>3"
3594 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3595 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3596 (match_operand:SSEMODE24 2 "register_operand" "")))]
3597 "TARGET_SSE2"
3598 {
3599 if (TARGET_SSE4_1)
3600 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3601 else
3602 {
3603 rtx xops[6];
3604 bool ok;
3605
3606 xops[0] = operands[0];
3607 xops[1] = operands[2];
3608 xops[2] = operands[1];
3609 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3610 xops[4] = operands[1];
3611 xops[5] = operands[2];
3612 ok = ix86_expand_int_vcond (xops);
3613 gcc_assert (ok);
3614 DONE;
3615 }
3616 })
3617
3618 (define_insn "*sse4_1_umin<mode>3"
3619 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3620 (umin:SSEMODE24
3621 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3622 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3623 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
3624 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
3625 [(set_attr "type" "sseiadd")
3626 (set_attr "prefix_extra" "1")
3627 (set_attr "mode" "TI")])
3628
3629 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3630 ;;
3631 ;; Parallel integral comparisons
3632 ;;
3633 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3634
3635 (define_insn "sse2_eq<mode>3"
3636 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3637 (eq:SSEMODE124
3638 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3639 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3640 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3641 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3642 [(set_attr "type" "ssecmp")
3643 (set_attr "prefix_data16" "1")
3644 (set_attr "mode" "TI")])
3645
3646 (define_insn "sse4_1_eqv2di3"
3647 [(set (match_operand:V2DI 0 "register_operand" "=x")
3648 (eq:V2DI
3649 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3650 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3651 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3652 "pcmpeqq\t{%2, %0|%0, %2}"
3653 [(set_attr "type" "ssecmp")
3654 (set_attr "prefix_extra" "1")
3655 (set_attr "mode" "TI")])
3656
3657 (define_insn "sse2_gt<mode>3"
3658 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3659 (gt:SSEMODE124
3660 (match_operand:SSEMODE124 1 "register_operand" "0")
3661 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3662 "TARGET_SSE2"
3663 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3664 [(set_attr "type" "ssecmp")
3665 (set_attr "prefix_data16" "1")
3666 (set_attr "mode" "TI")])
3667
3668 (define_insn "sse4_2_gtv2di3"
3669 [(set (match_operand:V2DI 0 "register_operand" "=x")
3670 (gt:V2DI
3671 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3672 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3673 "TARGET_SSE4_2"
3674 "pcmpgtq\t{%2, %0|%0, %2}"
3675 [(set_attr "type" "ssecmp")
3676 (set_attr "mode" "TI")])
3677
3678 (define_expand "vcond<mode>"
3679 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3680 (if_then_else:SSEMODEI
3681 (match_operator 3 ""
3682 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3683 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3684 (match_operand:SSEMODEI 1 "general_operand" "")
3685 (match_operand:SSEMODEI 2 "general_operand" "")))]
3686 "TARGET_SSE2"
3687 {
3688 if (ix86_expand_int_vcond (operands))
3689 DONE;
3690 else
3691 FAIL;
3692 })
3693
3694 (define_expand "vcondu<mode>"
3695 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3696 (if_then_else:SSEMODEI
3697 (match_operator 3 ""
3698 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3699 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3700 (match_operand:SSEMODEI 1 "general_operand" "")
3701 (match_operand:SSEMODEI 2 "general_operand" "")))]
3702 "TARGET_SSE2"
3703 {
3704 if (ix86_expand_int_vcond (operands))
3705 DONE;
3706 else
3707 FAIL;
3708 })
3709
3710 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3711 ;;
3712 ;; Parallel bitwise logical operations
3713 ;;
3714 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3715
3716 (define_expand "one_cmpl<mode>2"
3717 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3718 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3719 (match_dup 2)))]
3720 "TARGET_SSE2"
3721 {
3722 int i, n = GET_MODE_NUNITS (<MODE>mode);
3723 rtvec v = rtvec_alloc (n);
3724
3725 for (i = 0; i < n; ++i)
3726 RTVEC_ELT (v, i) = constm1_rtx;
3727
3728 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3729 })
3730
3731 (define_expand "and<mode>3"
3732 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3733 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3734 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3735 "TARGET_SSE2"
3736 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3737
3738 (define_insn "*and<mode>3"
3739 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3740 (and:SSEMODEI
3741 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3742 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3743 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3744 "pand\t{%2, %0|%0, %2}"
3745 [(set_attr "type" "sselog")
3746 (set_attr "prefix_data16" "1")
3747 (set_attr "mode" "TI")])
3748
3749 (define_insn "sse2_nand<mode>3"
3750 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3751 (and:SSEMODEI
3752 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3753 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3754 "TARGET_SSE2"
3755 "pandn\t{%2, %0|%0, %2}"
3756 [(set_attr "type" "sselog")
3757 (set_attr "prefix_data16" "1")
3758 (set_attr "mode" "TI")])
3759
3760 (define_expand "andtf3"
3761 [(set (match_operand:TF 0 "register_operand" "")
3762 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
3763 (match_operand:TF 2 "nonimmediate_operand" "")))]
3764 "TARGET_64BIT"
3765 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
3766
3767 (define_insn "*andtf3"
3768 [(set (match_operand:TF 0 "register_operand" "=x")
3769 (and:TF
3770 (match_operand:TF 1 "nonimmediate_operand" "%0")
3771 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3772 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
3773 "pand\t{%2, %0|%0, %2}"
3774 [(set_attr "type" "sselog")
3775 (set_attr "prefix_data16" "1")
3776 (set_attr "mode" "TI")])
3777
3778 (define_insn "*nandtf3"
3779 [(set (match_operand:TF 0 "register_operand" "=x")
3780 (and:TF
3781 (not:TF (match_operand:TF 1 "register_operand" "0"))
3782 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3783 "TARGET_64BIT"
3784 "pandn\t{%2, %0|%0, %2}"
3785 [(set_attr "type" "sselog")
3786 (set_attr "prefix_data16" "1")
3787 (set_attr "mode" "TI")])
3788
3789 (define_expand "ior<mode>3"
3790 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3791 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3792 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3793 "TARGET_SSE2"
3794 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3795
3796 (define_insn "*ior<mode>3"
3797 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3798 (ior:SSEMODEI
3799 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3800 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3801 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3802 "por\t{%2, %0|%0, %2}"
3803 [(set_attr "type" "sselog")
3804 (set_attr "prefix_data16" "1")
3805 (set_attr "mode" "TI")])
3806
3807 (define_expand "iortf3"
3808 [(set (match_operand:TF 0 "register_operand" "")
3809 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
3810 (match_operand:TF 2 "nonimmediate_operand" "")))]
3811 "TARGET_64BIT"
3812 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
3813
3814 (define_insn "*iortf3"
3815 [(set (match_operand:TF 0 "register_operand" "=x")
3816 (ior:TF
3817 (match_operand:TF 1 "nonimmediate_operand" "%0")
3818 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3819 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
3820 "por\t{%2, %0|%0, %2}"
3821 [(set_attr "type" "sselog")
3822 (set_attr "prefix_data16" "1")
3823 (set_attr "mode" "TI")])
3824
3825 (define_expand "xor<mode>3"
3826 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3827 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3828 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3829 "TARGET_SSE2"
3830 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3831
3832 (define_insn "*xor<mode>3"
3833 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3834 (xor:SSEMODEI
3835 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3836 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3837 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3838 "pxor\t{%2, %0|%0, %2}"
3839 [(set_attr "type" "sselog")
3840 (set_attr "prefix_data16" "1")
3841 (set_attr "mode" "TI")])
3842
3843 (define_expand "xortf3"
3844 [(set (match_operand:TF 0 "register_operand" "")
3845 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
3846 (match_operand:TF 2 "nonimmediate_operand" "")))]
3847 "TARGET_64BIT"
3848 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
3849
3850 (define_insn "*xortf3"
3851 [(set (match_operand:TF 0 "register_operand" "=x")
3852 (xor:TF
3853 (match_operand:TF 1 "nonimmediate_operand" "%0")
3854 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3855 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
3856 "pxor\t{%2, %0|%0, %2}"
3857 [(set_attr "type" "sselog")
3858 (set_attr "prefix_data16" "1")
3859 (set_attr "mode" "TI")])
3860
3861 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3862 ;;
3863 ;; Parallel integral element swizzling
3864 ;;
3865 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3866
3867 ;; Reduce:
3868 ;; op1 = abcdefghijklmnop
3869 ;; op2 = qrstuvwxyz012345
3870 ;; h1 = aqbrcsdteufvgwhx
3871 ;; l1 = iyjzk0l1m2n3o4p5
3872 ;; h2 = aiqybjrzcks0dlt1
3873 ;; l2 = emu2fnv3gow4hpx5
3874 ;; h3 = aeimquy2bfjnrvz3
3875 ;; l3 = cgkosw04dhlptx15
3876 ;; result = bdfhjlnprtvxz135
3877 (define_expand "vec_pack_trunc_v8hi"
3878 [(match_operand:V16QI 0 "register_operand" "")
3879 (match_operand:V8HI 1 "register_operand" "")
3880 (match_operand:V8HI 2 "register_operand" "")]
3881 "TARGET_SSE2"
3882 {
3883 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3884
3885 op1 = gen_lowpart (V16QImode, operands[1]);
3886 op2 = gen_lowpart (V16QImode, operands[2]);
3887 h1 = gen_reg_rtx (V16QImode);
3888 l1 = gen_reg_rtx (V16QImode);
3889 h2 = gen_reg_rtx (V16QImode);
3890 l2 = gen_reg_rtx (V16QImode);
3891 h3 = gen_reg_rtx (V16QImode);
3892 l3 = gen_reg_rtx (V16QImode);
3893
3894 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3895 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3896 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3897 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3898 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3899 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3900 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3901 DONE;
3902 })
3903
3904 ;; Reduce:
3905 ;; op1 = abcdefgh
3906 ;; op2 = ijklmnop
3907 ;; h1 = aibjckdl
3908 ;; l1 = emfngohp
3909 ;; h2 = aeimbfjn
3910 ;; l2 = cgkodhlp
3911 ;; result = bdfhjlnp
3912 (define_expand "vec_pack_trunc_v4si"
3913 [(match_operand:V8HI 0 "register_operand" "")
3914 (match_operand:V4SI 1 "register_operand" "")
3915 (match_operand:V4SI 2 "register_operand" "")]
3916 "TARGET_SSE2"
3917 {
3918 rtx op1, op2, h1, l1, h2, l2;
3919
3920 op1 = gen_lowpart (V8HImode, operands[1]);
3921 op2 = gen_lowpart (V8HImode, operands[2]);
3922 h1 = gen_reg_rtx (V8HImode);
3923 l1 = gen_reg_rtx (V8HImode);
3924 h2 = gen_reg_rtx (V8HImode);
3925 l2 = gen_reg_rtx (V8HImode);
3926
3927 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3928 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3929 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3930 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3931 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3932 DONE;
3933 })
3934
3935 ;; Reduce:
3936 ;; op1 = abcd
3937 ;; op2 = efgh
3938 ;; h1 = aebf
3939 ;; l1 = cgdh
3940 ;; result = bdfh
3941 (define_expand "vec_pack_trunc_v2di"
3942 [(match_operand:V4SI 0 "register_operand" "")
3943 (match_operand:V2DI 1 "register_operand" "")
3944 (match_operand:V2DI 2 "register_operand" "")]
3945 "TARGET_SSE2"
3946 {
3947 rtx op1, op2, h1, l1;
3948
3949 op1 = gen_lowpart (V4SImode, operands[1]);
3950 op2 = gen_lowpart (V4SImode, operands[2]);
3951 h1 = gen_reg_rtx (V4SImode);
3952 l1 = gen_reg_rtx (V4SImode);
3953
3954 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3955 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3956 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3957 DONE;
3958 })
3959
3960 (define_expand "vec_interleave_highv16qi"
3961 [(set (match_operand:V16QI 0 "register_operand" "=x")
3962 (vec_select:V16QI
3963 (vec_concat:V32QI
3964 (match_operand:V16QI 1 "register_operand" "0")
3965 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3966 (parallel [(const_int 8) (const_int 24)
3967 (const_int 9) (const_int 25)
3968 (const_int 10) (const_int 26)
3969 (const_int 11) (const_int 27)
3970 (const_int 12) (const_int 28)
3971 (const_int 13) (const_int 29)
3972 (const_int 14) (const_int 30)
3973 (const_int 15) (const_int 31)])))]
3974 "TARGET_SSE2"
3975 {
3976 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
3977 DONE;
3978 })
3979
3980 (define_expand "vec_interleave_lowv16qi"
3981 [(set (match_operand:V16QI 0 "register_operand" "=x")
3982 (vec_select:V16QI
3983 (vec_concat:V32QI
3984 (match_operand:V16QI 1 "register_operand" "0")
3985 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3986 (parallel [(const_int 0) (const_int 16)
3987 (const_int 1) (const_int 17)
3988 (const_int 2) (const_int 18)
3989 (const_int 3) (const_int 19)
3990 (const_int 4) (const_int 20)
3991 (const_int 5) (const_int 21)
3992 (const_int 6) (const_int 22)
3993 (const_int 7) (const_int 23)])))]
3994 "TARGET_SSE2"
3995 {
3996 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
3997 DONE;
3998 })
3999
4000 (define_expand "vec_interleave_highv8hi"
4001 [(set (match_operand:V8HI 0 "register_operand" "=x")
4002 (vec_select:V8HI
4003 (vec_concat:V16HI
4004 (match_operand:V8HI 1 "register_operand" "0")
4005 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4006 (parallel [(const_int 4) (const_int 12)
4007 (const_int 5) (const_int 13)
4008 (const_int 6) (const_int 14)
4009 (const_int 7) (const_int 15)])))]
4010 "TARGET_SSE2"
4011 {
4012 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4013 DONE;
4014 })
4015
4016 (define_expand "vec_interleave_lowv8hi"
4017 [(set (match_operand:V8HI 0 "register_operand" "=x")
4018 (vec_select:V8HI
4019 (vec_concat:V16HI
4020 (match_operand:V8HI 1 "register_operand" "0")
4021 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4022 (parallel [(const_int 0) (const_int 8)
4023 (const_int 1) (const_int 9)
4024 (const_int 2) (const_int 10)
4025 (const_int 3) (const_int 11)])))]
4026 "TARGET_SSE2"
4027 {
4028 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4029 DONE;
4030 })
4031
4032 (define_expand "vec_interleave_highv4si"
4033 [(set (match_operand:V4SI 0 "register_operand" "=x")
4034 (vec_select:V4SI
4035 (vec_concat:V8SI
4036 (match_operand:V4SI 1 "register_operand" "0")
4037 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4038 (parallel [(const_int 2) (const_int 6)
4039 (const_int 3) (const_int 7)])))]
4040 "TARGET_SSE2"
4041 {
4042 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4043 DONE;
4044 })
4045
4046 (define_expand "vec_interleave_lowv4si"
4047 [(set (match_operand:V4SI 0 "register_operand" "=x")
4048 (vec_select:V4SI
4049 (vec_concat:V8SI
4050 (match_operand:V4SI 1 "register_operand" "0")
4051 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4052 (parallel [(const_int 0) (const_int 4)
4053 (const_int 1) (const_int 5)])))]
4054 "TARGET_SSE2"
4055 {
4056 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4057 DONE;
4058 })
4059
4060 (define_expand "vec_interleave_highv2di"
4061 [(set (match_operand:V2DI 0 "register_operand" "=x")
4062 (vec_select:V2DI
4063 (vec_concat:V4DI
4064 (match_operand:V2DI 1 "register_operand" "0")
4065 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4066 (parallel [(const_int 1)
4067 (const_int 3)])))]
4068 "TARGET_SSE2"
4069 {
4070 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4071 DONE;
4072 })
4073
4074 (define_expand "vec_interleave_lowv2di"
4075 [(set (match_operand:V2DI 0 "register_operand" "=x")
4076 (vec_select:V2DI
4077 (vec_concat:V4DI
4078 (match_operand:V2DI 1 "register_operand" "0")
4079 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4080 (parallel [(const_int 0)
4081 (const_int 2)])))]
4082 "TARGET_SSE2"
4083 {
4084 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4085 DONE;
4086 })
4087
4088 (define_insn "sse2_packsswb"
4089 [(set (match_operand:V16QI 0 "register_operand" "=x")
4090 (vec_concat:V16QI
4091 (ss_truncate:V8QI
4092 (match_operand:V8HI 1 "register_operand" "0"))
4093 (ss_truncate:V8QI
4094 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4095 "TARGET_SSE2"
4096 "packsswb\t{%2, %0|%0, %2}"
4097 [(set_attr "type" "sselog")
4098 (set_attr "prefix_data16" "1")
4099 (set_attr "mode" "TI")])
4100
4101 (define_insn "sse2_packssdw"
4102 [(set (match_operand:V8HI 0 "register_operand" "=x")
4103 (vec_concat:V8HI
4104 (ss_truncate:V4HI
4105 (match_operand:V4SI 1 "register_operand" "0"))
4106 (ss_truncate:V4HI
4107 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4108 "TARGET_SSE2"
4109 "packssdw\t{%2, %0|%0, %2}"
4110 [(set_attr "type" "sselog")
4111 (set_attr "prefix_data16" "1")
4112 (set_attr "mode" "TI")])
4113
4114 (define_insn "sse2_packuswb"
4115 [(set (match_operand:V16QI 0 "register_operand" "=x")
4116 (vec_concat:V16QI
4117 (us_truncate:V8QI
4118 (match_operand:V8HI 1 "register_operand" "0"))
4119 (us_truncate:V8QI
4120 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4121 "TARGET_SSE2"
4122 "packuswb\t{%2, %0|%0, %2}"
4123 [(set_attr "type" "sselog")
4124 (set_attr "prefix_data16" "1")
4125 (set_attr "mode" "TI")])
4126
4127 (define_insn "sse2_punpckhbw"
4128 [(set (match_operand:V16QI 0 "register_operand" "=x")
4129 (vec_select:V16QI
4130 (vec_concat:V32QI
4131 (match_operand:V16QI 1 "register_operand" "0")
4132 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4133 (parallel [(const_int 8) (const_int 24)
4134 (const_int 9) (const_int 25)
4135 (const_int 10) (const_int 26)
4136 (const_int 11) (const_int 27)
4137 (const_int 12) (const_int 28)
4138 (const_int 13) (const_int 29)
4139 (const_int 14) (const_int 30)
4140 (const_int 15) (const_int 31)])))]
4141 "TARGET_SSE2"
4142 "punpckhbw\t{%2, %0|%0, %2}"
4143 [(set_attr "type" "sselog")
4144 (set_attr "prefix_data16" "1")
4145 (set_attr "mode" "TI")])
4146
4147 (define_insn "sse2_punpcklbw"
4148 [(set (match_operand:V16QI 0 "register_operand" "=x")
4149 (vec_select:V16QI
4150 (vec_concat:V32QI
4151 (match_operand:V16QI 1 "register_operand" "0")
4152 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4153 (parallel [(const_int 0) (const_int 16)
4154 (const_int 1) (const_int 17)
4155 (const_int 2) (const_int 18)
4156 (const_int 3) (const_int 19)
4157 (const_int 4) (const_int 20)
4158 (const_int 5) (const_int 21)
4159 (const_int 6) (const_int 22)
4160 (const_int 7) (const_int 23)])))]
4161 "TARGET_SSE2"
4162 "punpcklbw\t{%2, %0|%0, %2}"
4163 [(set_attr "type" "sselog")
4164 (set_attr "prefix_data16" "1")
4165 (set_attr "mode" "TI")])
4166
4167 (define_insn "sse2_punpckhwd"
4168 [(set (match_operand:V8HI 0 "register_operand" "=x")
4169 (vec_select:V8HI
4170 (vec_concat:V16HI
4171 (match_operand:V8HI 1 "register_operand" "0")
4172 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4173 (parallel [(const_int 4) (const_int 12)
4174 (const_int 5) (const_int 13)
4175 (const_int 6) (const_int 14)
4176 (const_int 7) (const_int 15)])))]
4177 "TARGET_SSE2"
4178 "punpckhwd\t{%2, %0|%0, %2}"
4179 [(set_attr "type" "sselog")
4180 (set_attr "prefix_data16" "1")
4181 (set_attr "mode" "TI")])
4182
4183 (define_insn "sse2_punpcklwd"
4184 [(set (match_operand:V8HI 0 "register_operand" "=x")
4185 (vec_select:V8HI
4186 (vec_concat:V16HI
4187 (match_operand:V8HI 1 "register_operand" "0")
4188 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4189 (parallel [(const_int 0) (const_int 8)
4190 (const_int 1) (const_int 9)
4191 (const_int 2) (const_int 10)
4192 (const_int 3) (const_int 11)])))]
4193 "TARGET_SSE2"
4194 "punpcklwd\t{%2, %0|%0, %2}"
4195 [(set_attr "type" "sselog")
4196 (set_attr "prefix_data16" "1")
4197 (set_attr "mode" "TI")])
4198
4199 (define_insn "sse2_punpckhdq"
4200 [(set (match_operand:V4SI 0 "register_operand" "=x")
4201 (vec_select:V4SI
4202 (vec_concat:V8SI
4203 (match_operand:V4SI 1 "register_operand" "0")
4204 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4205 (parallel [(const_int 2) (const_int 6)
4206 (const_int 3) (const_int 7)])))]
4207 "TARGET_SSE2"
4208 "punpckhdq\t{%2, %0|%0, %2}"
4209 [(set_attr "type" "sselog")
4210 (set_attr "prefix_data16" "1")
4211 (set_attr "mode" "TI")])
4212
4213 (define_insn "sse2_punpckldq"
4214 [(set (match_operand:V4SI 0 "register_operand" "=x")
4215 (vec_select:V4SI
4216 (vec_concat:V8SI
4217 (match_operand:V4SI 1 "register_operand" "0")
4218 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4219 (parallel [(const_int 0) (const_int 4)
4220 (const_int 1) (const_int 5)])))]
4221 "TARGET_SSE2"
4222 "punpckldq\t{%2, %0|%0, %2}"
4223 [(set_attr "type" "sselog")
4224 (set_attr "prefix_data16" "1")
4225 (set_attr "mode" "TI")])
4226
4227 (define_insn "sse2_punpckhqdq"
4228 [(set (match_operand:V2DI 0 "register_operand" "=x")
4229 (vec_select:V2DI
4230 (vec_concat:V4DI
4231 (match_operand:V2DI 1 "register_operand" "0")
4232 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4233 (parallel [(const_int 1)
4234 (const_int 3)])))]
4235 "TARGET_SSE2"
4236 "punpckhqdq\t{%2, %0|%0, %2}"
4237 [(set_attr "type" "sselog")
4238 (set_attr "prefix_data16" "1")
4239 (set_attr "mode" "TI")])
4240
4241 (define_insn "sse2_punpcklqdq"
4242 [(set (match_operand:V2DI 0 "register_operand" "=x")
4243 (vec_select:V2DI
4244 (vec_concat:V4DI
4245 (match_operand:V2DI 1 "register_operand" "0")
4246 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4247 (parallel [(const_int 0)
4248 (const_int 2)])))]
4249 "TARGET_SSE2"
4250 "punpcklqdq\t{%2, %0|%0, %2}"
4251 [(set_attr "type" "sselog")
4252 (set_attr "prefix_data16" "1")
4253 (set_attr "mode" "TI")])
4254
4255 (define_insn "*sse4_1_pinsrb"
4256 [(set (match_operand:V16QI 0 "register_operand" "=x")
4257 (vec_merge:V16QI
4258 (vec_duplicate:V16QI
4259 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4260 (match_operand:V16QI 1 "register_operand" "0")
4261 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4262 "TARGET_SSE4_1"
4263 {
4264 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4265 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4266 }
4267 [(set_attr "type" "sselog")
4268 (set_attr "prefix_extra" "1")
4269 (set_attr "mode" "TI")])
4270
4271 (define_insn "*sse2_pinsrw"
4272 [(set (match_operand:V8HI 0 "register_operand" "=x")
4273 (vec_merge:V8HI
4274 (vec_duplicate:V8HI
4275 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4276 (match_operand:V8HI 1 "register_operand" "0")
4277 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4278 "TARGET_SSE2"
4279 {
4280 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4281 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4282 }
4283 [(set_attr "type" "sselog")
4284 (set_attr "prefix_data16" "1")
4285 (set_attr "mode" "TI")])
4286
4287 ;; It must come before sse2_loadld since it is preferred.
4288 (define_insn "*sse4_1_pinsrd"
4289 [(set (match_operand:V4SI 0 "register_operand" "=x")
4290 (vec_merge:V4SI
4291 (vec_duplicate:V4SI
4292 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4293 (match_operand:V4SI 1 "register_operand" "0")
4294 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4295 "TARGET_SSE4_1"
4296 {
4297 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4298 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4299 }
4300 [(set_attr "type" "sselog")
4301 (set_attr "prefix_extra" "1")
4302 (set_attr "mode" "TI")])
4303
4304 (define_insn "*sse4_1_pinsrq"
4305 [(set (match_operand:V2DI 0 "register_operand" "=x")
4306 (vec_merge:V2DI
4307 (vec_duplicate:V2DI
4308 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4309 (match_operand:V2DI 1 "register_operand" "0")
4310 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4311 "TARGET_SSE4_1"
4312 {
4313 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4314 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4315 }
4316 [(set_attr "type" "sselog")
4317 (set_attr "prefix_extra" "1")
4318 (set_attr "mode" "TI")])
4319
4320 (define_insn "*sse4_1_pextrb"
4321 [(set (match_operand:SI 0 "register_operand" "=r")
4322 (zero_extend:SI
4323 (vec_select:QI
4324 (match_operand:V16QI 1 "register_operand" "x")
4325 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4326 "TARGET_SSE4_1"
4327 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4328 [(set_attr "type" "sselog")
4329 (set_attr "prefix_extra" "1")
4330 (set_attr "mode" "TI")])
4331
4332 (define_insn "*sse4_1_pextrb_memory"
4333 [(set (match_operand:QI 0 "memory_operand" "=m")
4334 (vec_select:QI
4335 (match_operand:V16QI 1 "register_operand" "x")
4336 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4337 "TARGET_SSE4_1"
4338 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4339 [(set_attr "type" "sselog")
4340 (set_attr "prefix_extra" "1")
4341 (set_attr "mode" "TI")])
4342
4343 (define_insn "*sse2_pextrw"
4344 [(set (match_operand:SI 0 "register_operand" "=r")
4345 (zero_extend:SI
4346 (vec_select:HI
4347 (match_operand:V8HI 1 "register_operand" "x")
4348 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4349 "TARGET_SSE2"
4350 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4351 [(set_attr "type" "sselog")
4352 (set_attr "prefix_data16" "1")
4353 (set_attr "mode" "TI")])
4354
4355 (define_insn "*sse4_1_pextrw_memory"
4356 [(set (match_operand:HI 0 "memory_operand" "=m")
4357 (vec_select:HI
4358 (match_operand:V8HI 1 "register_operand" "x")
4359 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4360 "TARGET_SSE4_1"
4361 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4362 [(set_attr "type" "sselog")
4363 (set_attr "prefix_extra" "1")
4364 (set_attr "mode" "TI")])
4365
4366 (define_insn "*sse4_1_pextrd"
4367 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4368 (vec_select:SI
4369 (match_operand:V4SI 1 "register_operand" "x")
4370 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4371 "TARGET_SSE4_1"
4372 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4373 [(set_attr "type" "sselog")
4374 (set_attr "prefix_extra" "1")
4375 (set_attr "mode" "TI")])
4376
4377 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4378 (define_insn "*sse4_1_pextrq"
4379 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4380 (vec_select:DI
4381 (match_operand:V2DI 1 "register_operand" "x")
4382 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4383 "TARGET_SSE4_1 && TARGET_64BIT"
4384 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4385 [(set_attr "type" "sselog")
4386 (set_attr "prefix_extra" "1")
4387 (set_attr "mode" "TI")])
4388
4389 (define_expand "sse2_pshufd"
4390 [(match_operand:V4SI 0 "register_operand" "")
4391 (match_operand:V4SI 1 "nonimmediate_operand" "")
4392 (match_operand:SI 2 "const_int_operand" "")]
4393 "TARGET_SSE2"
4394 {
4395 int mask = INTVAL (operands[2]);
4396 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4397 GEN_INT ((mask >> 0) & 3),
4398 GEN_INT ((mask >> 2) & 3),
4399 GEN_INT ((mask >> 4) & 3),
4400 GEN_INT ((mask >> 6) & 3)));
4401 DONE;
4402 })
4403
4404 (define_insn "sse2_pshufd_1"
4405 [(set (match_operand:V4SI 0 "register_operand" "=x")
4406 (vec_select:V4SI
4407 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4408 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4409 (match_operand 3 "const_0_to_3_operand" "")
4410 (match_operand 4 "const_0_to_3_operand" "")
4411 (match_operand 5 "const_0_to_3_operand" "")])))]
4412 "TARGET_SSE2"
4413 {
4414 int mask = 0;
4415 mask |= INTVAL (operands[2]) << 0;
4416 mask |= INTVAL (operands[3]) << 2;
4417 mask |= INTVAL (operands[4]) << 4;
4418 mask |= INTVAL (operands[5]) << 6;
4419 operands[2] = GEN_INT (mask);
4420
4421 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4422 }
4423 [(set_attr "type" "sselog1")
4424 (set_attr "prefix_data16" "1")
4425 (set_attr "mode" "TI")])
4426
4427 (define_expand "sse2_pshuflw"
4428 [(match_operand:V8HI 0 "register_operand" "")
4429 (match_operand:V8HI 1 "nonimmediate_operand" "")
4430 (match_operand:SI 2 "const_int_operand" "")]
4431 "TARGET_SSE2"
4432 {
4433 int mask = INTVAL (operands[2]);
4434 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4435 GEN_INT ((mask >> 0) & 3),
4436 GEN_INT ((mask >> 2) & 3),
4437 GEN_INT ((mask >> 4) & 3),
4438 GEN_INT ((mask >> 6) & 3)));
4439 DONE;
4440 })
4441
4442 (define_insn "sse2_pshuflw_1"
4443 [(set (match_operand:V8HI 0 "register_operand" "=x")
4444 (vec_select:V8HI
4445 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4446 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4447 (match_operand 3 "const_0_to_3_operand" "")
4448 (match_operand 4 "const_0_to_3_operand" "")
4449 (match_operand 5 "const_0_to_3_operand" "")
4450 (const_int 4)
4451 (const_int 5)
4452 (const_int 6)
4453 (const_int 7)])))]
4454 "TARGET_SSE2"
4455 {
4456 int mask = 0;
4457 mask |= INTVAL (operands[2]) << 0;
4458 mask |= INTVAL (operands[3]) << 2;
4459 mask |= INTVAL (operands[4]) << 4;
4460 mask |= INTVAL (operands[5]) << 6;
4461 operands[2] = GEN_INT (mask);
4462
4463 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4464 }
4465 [(set_attr "type" "sselog")
4466 (set_attr "prefix_rep" "1")
4467 (set_attr "mode" "TI")])
4468
4469 (define_expand "sse2_pshufhw"
4470 [(match_operand:V8HI 0 "register_operand" "")
4471 (match_operand:V8HI 1 "nonimmediate_operand" "")
4472 (match_operand:SI 2 "const_int_operand" "")]
4473 "TARGET_SSE2"
4474 {
4475 int mask = INTVAL (operands[2]);
4476 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4477 GEN_INT (((mask >> 0) & 3) + 4),
4478 GEN_INT (((mask >> 2) & 3) + 4),
4479 GEN_INT (((mask >> 4) & 3) + 4),
4480 GEN_INT (((mask >> 6) & 3) + 4)));
4481 DONE;
4482 })
4483
4484 (define_insn "sse2_pshufhw_1"
4485 [(set (match_operand:V8HI 0 "register_operand" "=x")
4486 (vec_select:V8HI
4487 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4488 (parallel [(const_int 0)
4489 (const_int 1)
4490 (const_int 2)
4491 (const_int 3)
4492 (match_operand 2 "const_4_to_7_operand" "")
4493 (match_operand 3 "const_4_to_7_operand" "")
4494 (match_operand 4 "const_4_to_7_operand" "")
4495 (match_operand 5 "const_4_to_7_operand" "")])))]
4496 "TARGET_SSE2"
4497 {
4498 int mask = 0;
4499 mask |= (INTVAL (operands[2]) - 4) << 0;
4500 mask |= (INTVAL (operands[3]) - 4) << 2;
4501 mask |= (INTVAL (operands[4]) - 4) << 4;
4502 mask |= (INTVAL (operands[5]) - 4) << 6;
4503 operands[2] = GEN_INT (mask);
4504
4505 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4506 }
4507 [(set_attr "type" "sselog")
4508 (set_attr "prefix_rep" "1")
4509 (set_attr "mode" "TI")])
4510
4511 (define_expand "sse2_loadd"
4512 [(set (match_operand:V4SI 0 "register_operand" "")
4513 (vec_merge:V4SI
4514 (vec_duplicate:V4SI
4515 (match_operand:SI 1 "nonimmediate_operand" ""))
4516 (match_dup 2)
4517 (const_int 1)))]
4518 "TARGET_SSE"
4519 "operands[2] = CONST0_RTX (V4SImode);")
4520
4521 (define_insn "sse2_loadld"
4522 [(set (match_operand:V4SI 0 "register_operand" "=Yt,Yi,x,x")
4523 (vec_merge:V4SI
4524 (vec_duplicate:V4SI
4525 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4526 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4527 (const_int 1)))]
4528 "TARGET_SSE"
4529 "@
4530 movd\t{%2, %0|%0, %2}
4531 movd\t{%2, %0|%0, %2}
4532 movss\t{%2, %0|%0, %2}
4533 movss\t{%2, %0|%0, %2}"
4534 [(set_attr "type" "ssemov")
4535 (set_attr "mode" "TI,TI,V4SF,SF")])
4536
4537 (define_insn_and_split "sse2_stored"
4538 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4539 (vec_select:SI
4540 (match_operand:V4SI 1 "register_operand" "x,Yi")
4541 (parallel [(const_int 0)])))]
4542 "TARGET_SSE"
4543 "#"
4544 "&& reload_completed
4545 && (TARGET_INTER_UNIT_MOVES
4546 || MEM_P (operands [0])
4547 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4548 [(set (match_dup 0) (match_dup 1))]
4549 {
4550 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4551 })
4552
4553 (define_expand "sse_storeq"
4554 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4555 (vec_select:DI
4556 (match_operand:V2DI 1 "register_operand" "")
4557 (parallel [(const_int 0)])))]
4558 "TARGET_SSE"
4559 "")
4560
4561 (define_insn "*sse2_storeq_rex64"
4562 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r")
4563 (vec_select:DI
4564 (match_operand:V2DI 1 "register_operand" "x,Yi")
4565 (parallel [(const_int 0)])))]
4566 "TARGET_64BIT && TARGET_SSE"
4567 "#")
4568
4569 (define_insn "*sse2_storeq"
4570 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4571 (vec_select:DI
4572 (match_operand:V2DI 1 "register_operand" "x")
4573 (parallel [(const_int 0)])))]
4574 "TARGET_SSE"
4575 "#")
4576
4577 (define_split
4578 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4579 (vec_select:DI
4580 (match_operand:V2DI 1 "register_operand" "")
4581 (parallel [(const_int 0)])))]
4582 "TARGET_SSE
4583 && reload_completed
4584 && (TARGET_INTER_UNIT_MOVES
4585 || MEM_P (operands [0])
4586 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4587 [(set (match_dup 0) (match_dup 1))]
4588 {
4589 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4590 })
4591
4592 (define_insn "*vec_extractv2di_1_sse2"
4593 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4594 (vec_select:DI
4595 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4596 (parallel [(const_int 1)])))]
4597 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4598 "@
4599 movhps\t{%1, %0|%0, %1}
4600 psrldq\t{$8, %0|%0, 8}
4601 movq\t{%H1, %0|%0, %H1}"
4602 [(set_attr "type" "ssemov,sseishft,ssemov")
4603 (set_attr "memory" "*,none,*")
4604 (set_attr "mode" "V2SF,TI,TI")])
4605
4606 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4607 (define_insn "*vec_extractv2di_1_sse"
4608 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4609 (vec_select:DI
4610 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4611 (parallel [(const_int 1)])))]
4612 "!TARGET_SSE2 && TARGET_SSE
4613 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4614 "@
4615 movhps\t{%1, %0|%0, %1}
4616 movhlps\t{%1, %0|%0, %1}
4617 movlps\t{%H1, %0|%0, %H1}"
4618 [(set_attr "type" "ssemov")
4619 (set_attr "mode" "V2SF,V4SF,V2SF")])
4620
4621 (define_insn "*vec_dupv4si"
4622 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x")
4623 (vec_duplicate:V4SI
4624 (match_operand:SI 1 "register_operand" " Yt,0")))]
4625 "TARGET_SSE"
4626 "@
4627 pshufd\t{$0, %1, %0|%0, %1, 0}
4628 shufps\t{$0, %0, %0|%0, %0, 0}"
4629 [(set_attr "type" "sselog1")
4630 (set_attr "mode" "TI,V4SF")])
4631
4632 (define_insn "*vec_dupv2di"
4633 [(set (match_operand:V2DI 0 "register_operand" "=Yt,x")
4634 (vec_duplicate:V2DI
4635 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4636 "TARGET_SSE"
4637 "@
4638 punpcklqdq\t%0, %0
4639 movlhps\t%0, %0"
4640 [(set_attr "type" "sselog1,ssemov")
4641 (set_attr "mode" "TI,V4SF")])
4642
4643 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4644 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4645 ;; alternatives pretty much forces the MMX alternative to be chosen.
4646 (define_insn "*sse2_concatv2si"
4647 [(set (match_operand:V2SI 0 "register_operand" "=Yt, Yt,*y,*y")
4648 (vec_concat:V2SI
4649 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4650 (match_operand:SI 2 "reg_or_0_operand" " Yt,C ,*y, C")))]
4651 "TARGET_SSE2"
4652 "@
4653 punpckldq\t{%2, %0|%0, %2}
4654 movd\t{%1, %0|%0, %1}
4655 punpckldq\t{%2, %0|%0, %2}
4656 movd\t{%1, %0|%0, %1}"
4657 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4658 (set_attr "mode" "TI,TI,DI,DI")])
4659
4660 (define_insn "*sse1_concatv2si"
4661 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4662 (vec_concat:V2SI
4663 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4664 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4665 "TARGET_SSE"
4666 "@
4667 unpcklps\t{%2, %0|%0, %2}
4668 movss\t{%1, %0|%0, %1}
4669 punpckldq\t{%2, %0|%0, %2}
4670 movd\t{%1, %0|%0, %1}"
4671 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4672 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4673
4674 (define_insn "*vec_concatv4si_1"
4675 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x,x")
4676 (vec_concat:V4SI
4677 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4678 (match_operand:V2SI 2 "nonimmediate_operand" " Yt,x,m")))]
4679 "TARGET_SSE"
4680 "@
4681 punpcklqdq\t{%2, %0|%0, %2}
4682 movlhps\t{%2, %0|%0, %2}
4683 movhps\t{%2, %0|%0, %2}"
4684 [(set_attr "type" "sselog,ssemov,ssemov")
4685 (set_attr "mode" "TI,V4SF,V2SF")])
4686
4687 (define_insn "vec_concatv2di"
4688 [(set (match_operand:V2DI 0 "register_operand" "=Yt,?Yt,Yt,x,x,x")
4689 (vec_concat:V2DI
4690 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4691 (match_operand:DI 2 "vector_move_operand" " C, C,Yt,x,m,0")))]
4692 "TARGET_SSE"
4693 "@
4694 movq\t{%1, %0|%0, %1}
4695 movq2dq\t{%1, %0|%0, %1}
4696 punpcklqdq\t{%2, %0|%0, %2}
4697 movlhps\t{%2, %0|%0, %2}
4698 movhps\t{%2, %0|%0, %2}
4699 movlps\t{%1, %0|%0, %1}"
4700 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4701 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4702
4703 (define_expand "vec_setv2di"
4704 [(match_operand:V2DI 0 "register_operand" "")
4705 (match_operand:DI 1 "register_operand" "")
4706 (match_operand 2 "const_int_operand" "")]
4707 "TARGET_SSE"
4708 {
4709 ix86_expand_vector_set (false, operands[0], operands[1],
4710 INTVAL (operands[2]));
4711 DONE;
4712 })
4713
4714 (define_expand "vec_extractv2di"
4715 [(match_operand:DI 0 "register_operand" "")
4716 (match_operand:V2DI 1 "register_operand" "")
4717 (match_operand 2 "const_int_operand" "")]
4718 "TARGET_SSE"
4719 {
4720 ix86_expand_vector_extract (false, operands[0], operands[1],
4721 INTVAL (operands[2]));
4722 DONE;
4723 })
4724
4725 (define_expand "vec_initv2di"
4726 [(match_operand:V2DI 0 "register_operand" "")
4727 (match_operand 1 "" "")]
4728 "TARGET_SSE"
4729 {
4730 ix86_expand_vector_init (false, operands[0], operands[1]);
4731 DONE;
4732 })
4733
4734 (define_expand "vec_setv4si"
4735 [(match_operand:V4SI 0 "register_operand" "")
4736 (match_operand:SI 1 "register_operand" "")
4737 (match_operand 2 "const_int_operand" "")]
4738 "TARGET_SSE"
4739 {
4740 ix86_expand_vector_set (false, operands[0], operands[1],
4741 INTVAL (operands[2]));
4742 DONE;
4743 })
4744
4745 (define_expand "vec_extractv4si"
4746 [(match_operand:SI 0 "register_operand" "")
4747 (match_operand:V4SI 1 "register_operand" "")
4748 (match_operand 2 "const_int_operand" "")]
4749 "TARGET_SSE"
4750 {
4751 ix86_expand_vector_extract (false, operands[0], operands[1],
4752 INTVAL (operands[2]));
4753 DONE;
4754 })
4755
4756 (define_expand "vec_initv4si"
4757 [(match_operand:V4SI 0 "register_operand" "")
4758 (match_operand 1 "" "")]
4759 "TARGET_SSE"
4760 {
4761 ix86_expand_vector_init (false, operands[0], operands[1]);
4762 DONE;
4763 })
4764
4765 (define_expand "vec_setv8hi"
4766 [(match_operand:V8HI 0 "register_operand" "")
4767 (match_operand:HI 1 "register_operand" "")
4768 (match_operand 2 "const_int_operand" "")]
4769 "TARGET_SSE"
4770 {
4771 ix86_expand_vector_set (false, operands[0], operands[1],
4772 INTVAL (operands[2]));
4773 DONE;
4774 })
4775
4776 (define_expand "vec_extractv8hi"
4777 [(match_operand:HI 0 "register_operand" "")
4778 (match_operand:V8HI 1 "register_operand" "")
4779 (match_operand 2 "const_int_operand" "")]
4780 "TARGET_SSE"
4781 {
4782 ix86_expand_vector_extract (false, operands[0], operands[1],
4783 INTVAL (operands[2]));
4784 DONE;
4785 })
4786
4787 (define_expand "vec_initv8hi"
4788 [(match_operand:V8HI 0 "register_operand" "")
4789 (match_operand 1 "" "")]
4790 "TARGET_SSE"
4791 {
4792 ix86_expand_vector_init (false, operands[0], operands[1]);
4793 DONE;
4794 })
4795
4796 (define_expand "vec_setv16qi"
4797 [(match_operand:V16QI 0 "register_operand" "")
4798 (match_operand:QI 1 "register_operand" "")
4799 (match_operand 2 "const_int_operand" "")]
4800 "TARGET_SSE"
4801 {
4802 ix86_expand_vector_set (false, operands[0], operands[1],
4803 INTVAL (operands[2]));
4804 DONE;
4805 })
4806
4807 (define_expand "vec_extractv16qi"
4808 [(match_operand:QI 0 "register_operand" "")
4809 (match_operand:V16QI 1 "register_operand" "")
4810 (match_operand 2 "const_int_operand" "")]
4811 "TARGET_SSE"
4812 {
4813 ix86_expand_vector_extract (false, operands[0], operands[1],
4814 INTVAL (operands[2]));
4815 DONE;
4816 })
4817
4818 (define_expand "vec_initv16qi"
4819 [(match_operand:V16QI 0 "register_operand" "")
4820 (match_operand 1 "" "")]
4821 "TARGET_SSE"
4822 {
4823 ix86_expand_vector_init (false, operands[0], operands[1]);
4824 DONE;
4825 })
4826
4827 (define_expand "vec_unpacku_hi_v16qi"
4828 [(match_operand:V8HI 0 "register_operand" "")
4829 (match_operand:V16QI 1 "register_operand" "")]
4830 "TARGET_SSE2"
4831 {
4832 if (TARGET_SSE4_1)
4833 ix86_expand_sse4_unpack (operands, true, true);
4834 else
4835 ix86_expand_sse_unpack (operands, true, true);
4836 DONE;
4837 })
4838
4839 (define_expand "vec_unpacks_hi_v16qi"
4840 [(match_operand:V8HI 0 "register_operand" "")
4841 (match_operand:V16QI 1 "register_operand" "")]
4842 "TARGET_SSE2"
4843 {
4844 if (TARGET_SSE4_1)
4845 ix86_expand_sse4_unpack (operands, false, true);
4846 else
4847 ix86_expand_sse_unpack (operands, false, true);
4848 DONE;
4849 })
4850
4851 (define_expand "vec_unpacku_lo_v16qi"
4852 [(match_operand:V8HI 0 "register_operand" "")
4853 (match_operand:V16QI 1 "register_operand" "")]
4854 "TARGET_SSE2"
4855 {
4856 if (TARGET_SSE4_1)
4857 ix86_expand_sse4_unpack (operands, true, false);
4858 else
4859 ix86_expand_sse_unpack (operands, true, false);
4860 DONE;
4861 })
4862
4863 (define_expand "vec_unpacks_lo_v16qi"
4864 [(match_operand:V8HI 0 "register_operand" "")
4865 (match_operand:V16QI 1 "register_operand" "")]
4866 "TARGET_SSE2"
4867 {
4868 if (TARGET_SSE4_1)
4869 ix86_expand_sse4_unpack (operands, false, false);
4870 else
4871 ix86_expand_sse_unpack (operands, false, false);
4872 DONE;
4873 })
4874
4875 (define_expand "vec_unpacku_hi_v8hi"
4876 [(match_operand:V4SI 0 "register_operand" "")
4877 (match_operand:V8HI 1 "register_operand" "")]
4878 "TARGET_SSE2"
4879 {
4880 if (TARGET_SSE4_1)
4881 ix86_expand_sse4_unpack (operands, true, true);
4882 else
4883 ix86_expand_sse_unpack (operands, true, true);
4884 DONE;
4885 })
4886
4887 (define_expand "vec_unpacks_hi_v8hi"
4888 [(match_operand:V4SI 0 "register_operand" "")
4889 (match_operand:V8HI 1 "register_operand" "")]
4890 "TARGET_SSE2"
4891 {
4892 if (TARGET_SSE4_1)
4893 ix86_expand_sse4_unpack (operands, false, true);
4894 else
4895 ix86_expand_sse_unpack (operands, false, true);
4896 DONE;
4897 })
4898
4899 (define_expand "vec_unpacku_lo_v8hi"
4900 [(match_operand:V4SI 0 "register_operand" "")
4901 (match_operand:V8HI 1 "register_operand" "")]
4902 "TARGET_SSE2"
4903 {
4904 if (TARGET_SSE4_1)
4905 ix86_expand_sse4_unpack (operands, true, false);
4906 else
4907 ix86_expand_sse_unpack (operands, true, false);
4908 DONE;
4909 })
4910
4911 (define_expand "vec_unpacks_lo_v8hi"
4912 [(match_operand:V4SI 0 "register_operand" "")
4913 (match_operand:V8HI 1 "register_operand" "")]
4914 "TARGET_SSE2"
4915 {
4916 if (TARGET_SSE4_1)
4917 ix86_expand_sse4_unpack (operands, false, false);
4918 else
4919 ix86_expand_sse_unpack (operands, false, false);
4920 DONE;
4921 })
4922
4923 (define_expand "vec_unpacku_hi_v4si"
4924 [(match_operand:V2DI 0 "register_operand" "")
4925 (match_operand:V4SI 1 "register_operand" "")]
4926 "TARGET_SSE2"
4927 {
4928 if (TARGET_SSE4_1)
4929 ix86_expand_sse4_unpack (operands, true, true);
4930 else
4931 ix86_expand_sse_unpack (operands, true, true);
4932 DONE;
4933 })
4934
4935 (define_expand "vec_unpacks_hi_v4si"
4936 [(match_operand:V2DI 0 "register_operand" "")
4937 (match_operand:V4SI 1 "register_operand" "")]
4938 "TARGET_SSE2"
4939 {
4940 if (TARGET_SSE4_1)
4941 ix86_expand_sse4_unpack (operands, false, true);
4942 else
4943 ix86_expand_sse_unpack (operands, false, true);
4944 DONE;
4945 })
4946
4947 (define_expand "vec_unpacku_lo_v4si"
4948 [(match_operand:V2DI 0 "register_operand" "")
4949 (match_operand:V4SI 1 "register_operand" "")]
4950 "TARGET_SSE2"
4951 {
4952 if (TARGET_SSE4_1)
4953 ix86_expand_sse4_unpack (operands, true, false);
4954 else
4955 ix86_expand_sse_unpack (operands, true, false);
4956 DONE;
4957 })
4958
4959 (define_expand "vec_unpacks_lo_v4si"
4960 [(match_operand:V2DI 0 "register_operand" "")
4961 (match_operand:V4SI 1 "register_operand" "")]
4962 "TARGET_SSE2"
4963 {
4964 if (TARGET_SSE4_1)
4965 ix86_expand_sse4_unpack (operands, false, false);
4966 else
4967 ix86_expand_sse_unpack (operands, false, false);
4968 DONE;
4969 })
4970
4971 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4972 ;;
4973 ;; Miscellaneous
4974 ;;
4975 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4976
4977 (define_insn "sse2_uavgv16qi3"
4978 [(set (match_operand:V16QI 0 "register_operand" "=x")
4979 (truncate:V16QI
4980 (lshiftrt:V16HI
4981 (plus:V16HI
4982 (plus:V16HI
4983 (zero_extend:V16HI
4984 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
4985 (zero_extend:V16HI
4986 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
4987 (const_vector:V16QI [(const_int 1) (const_int 1)
4988 (const_int 1) (const_int 1)
4989 (const_int 1) (const_int 1)
4990 (const_int 1) (const_int 1)
4991 (const_int 1) (const_int 1)
4992 (const_int 1) (const_int 1)
4993 (const_int 1) (const_int 1)
4994 (const_int 1) (const_int 1)]))
4995 (const_int 1))))]
4996 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
4997 "pavgb\t{%2, %0|%0, %2}"
4998 [(set_attr "type" "sseiadd")
4999 (set_attr "prefix_data16" "1")
5000 (set_attr "mode" "TI")])
5001
5002 (define_insn "sse2_uavgv8hi3"
5003 [(set (match_operand:V8HI 0 "register_operand" "=x")
5004 (truncate:V8HI
5005 (lshiftrt:V8SI
5006 (plus:V8SI
5007 (plus:V8SI
5008 (zero_extend:V8SI
5009 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5010 (zero_extend:V8SI
5011 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5012 (const_vector:V8HI [(const_int 1) (const_int 1)
5013 (const_int 1) (const_int 1)
5014 (const_int 1) (const_int 1)
5015 (const_int 1) (const_int 1)]))
5016 (const_int 1))))]
5017 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5018 "pavgw\t{%2, %0|%0, %2}"
5019 [(set_attr "type" "sseiadd")
5020 (set_attr "prefix_data16" "1")
5021 (set_attr "mode" "TI")])
5022
5023 ;; The correct representation for this is absolutely enormous, and
5024 ;; surely not generally useful.
5025 (define_insn "sse2_psadbw"
5026 [(set (match_operand:V2DI 0 "register_operand" "=x")
5027 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5028 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5029 UNSPEC_PSADBW))]
5030 "TARGET_SSE2"
5031 "psadbw\t{%2, %0|%0, %2}"
5032 [(set_attr "type" "sseiadd")
5033 (set_attr "prefix_data16" "1")
5034 (set_attr "mode" "TI")])
5035
5036 (define_insn "sse_movmskps"
5037 [(set (match_operand:SI 0 "register_operand" "=r")
5038 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
5039 UNSPEC_MOVMSK))]
5040 "TARGET_SSE"
5041 "movmskps\t{%1, %0|%0, %1}"
5042 [(set_attr "type" "ssecvt")
5043 (set_attr "mode" "V4SF")])
5044
5045 (define_insn "sse2_movmskpd"
5046 [(set (match_operand:SI 0 "register_operand" "=r")
5047 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
5048 UNSPEC_MOVMSK))]
5049 "TARGET_SSE2"
5050 "movmskpd\t{%1, %0|%0, %1}"
5051 [(set_attr "type" "ssecvt")
5052 (set_attr "mode" "V2DF")])
5053
5054 (define_insn "sse2_pmovmskb"
5055 [(set (match_operand:SI 0 "register_operand" "=r")
5056 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5057 UNSPEC_MOVMSK))]
5058 "TARGET_SSE2"
5059 "pmovmskb\t{%1, %0|%0, %1}"
5060 [(set_attr "type" "ssecvt")
5061 (set_attr "prefix_data16" "1")
5062 (set_attr "mode" "SI")])
5063
5064 (define_expand "sse2_maskmovdqu"
5065 [(set (match_operand:V16QI 0 "memory_operand" "")
5066 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5067 (match_operand:V16QI 2 "register_operand" "x")
5068 (match_dup 0)]
5069 UNSPEC_MASKMOV))]
5070 "TARGET_SSE2"
5071 "")
5072
5073 (define_insn "*sse2_maskmovdqu"
5074 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5075 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5076 (match_operand:V16QI 2 "register_operand" "x")
5077 (mem:V16QI (match_dup 0))]
5078 UNSPEC_MASKMOV))]
5079 "TARGET_SSE2 && !TARGET_64BIT"
5080 ;; @@@ check ordering of operands in intel/nonintel syntax
5081 "maskmovdqu\t{%2, %1|%1, %2}"
5082 [(set_attr "type" "ssecvt")
5083 (set_attr "prefix_data16" "1")
5084 (set_attr "mode" "TI")])
5085
5086 (define_insn "*sse2_maskmovdqu_rex64"
5087 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5088 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5089 (match_operand:V16QI 2 "register_operand" "x")
5090 (mem:V16QI (match_dup 0))]
5091 UNSPEC_MASKMOV))]
5092 "TARGET_SSE2 && TARGET_64BIT"
5093 ;; @@@ check ordering of operands in intel/nonintel syntax
5094 "maskmovdqu\t{%2, %1|%1, %2}"
5095 [(set_attr "type" "ssecvt")
5096 (set_attr "prefix_data16" "1")
5097 (set_attr "mode" "TI")])
5098
5099 (define_insn "sse_ldmxcsr"
5100 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5101 UNSPECV_LDMXCSR)]
5102 "TARGET_SSE"
5103 "ldmxcsr\t%0"
5104 [(set_attr "type" "sse")
5105 (set_attr "memory" "load")])
5106
5107 (define_insn "sse_stmxcsr"
5108 [(set (match_operand:SI 0 "memory_operand" "=m")
5109 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5110 "TARGET_SSE"
5111 "stmxcsr\t%0"
5112 [(set_attr "type" "sse")
5113 (set_attr "memory" "store")])
5114
5115 (define_expand "sse_sfence"
5116 [(set (match_dup 0)
5117 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5118 "TARGET_SSE || TARGET_3DNOW_A"
5119 {
5120 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5121 MEM_VOLATILE_P (operands[0]) = 1;
5122 })
5123
5124 (define_insn "*sse_sfence"
5125 [(set (match_operand:BLK 0 "" "")
5126 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5127 "TARGET_SSE || TARGET_3DNOW_A"
5128 "sfence"
5129 [(set_attr "type" "sse")
5130 (set_attr "memory" "unknown")])
5131
5132 (define_insn "sse2_clflush"
5133 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5134 UNSPECV_CLFLUSH)]
5135 "TARGET_SSE2"
5136 "clflush\t%a0"
5137 [(set_attr "type" "sse")
5138 (set_attr "memory" "unknown")])
5139
5140 (define_expand "sse2_mfence"
5141 [(set (match_dup 0)
5142 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5143 "TARGET_SSE2"
5144 {
5145 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5146 MEM_VOLATILE_P (operands[0]) = 1;
5147 })
5148
5149 (define_insn "*sse2_mfence"
5150 [(set (match_operand:BLK 0 "" "")
5151 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5152 "TARGET_SSE2"
5153 "mfence"
5154 [(set_attr "type" "sse")
5155 (set_attr "memory" "unknown")])
5156
5157 (define_expand "sse2_lfence"
5158 [(set (match_dup 0)
5159 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5160 "TARGET_SSE2"
5161 {
5162 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5163 MEM_VOLATILE_P (operands[0]) = 1;
5164 })
5165
5166 (define_insn "*sse2_lfence"
5167 [(set (match_operand:BLK 0 "" "")
5168 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5169 "TARGET_SSE2"
5170 "lfence"
5171 [(set_attr "type" "sse")
5172 (set_attr "memory" "unknown")])
5173
5174 (define_insn "sse3_mwait"
5175 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5176 (match_operand:SI 1 "register_operand" "c")]
5177 UNSPECV_MWAIT)]
5178 "TARGET_SSE3"
5179 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5180 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5181 ;; we only need to set up 32bit registers.
5182 "mwait"
5183 [(set_attr "length" "3")])
5184
5185 (define_insn "sse3_monitor"
5186 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5187 (match_operand:SI 1 "register_operand" "c")
5188 (match_operand:SI 2 "register_operand" "d")]
5189 UNSPECV_MONITOR)]
5190 "TARGET_SSE3 && !TARGET_64BIT"
5191 "monitor\t%0, %1, %2"
5192 [(set_attr "length" "3")])
5193
5194 (define_insn "sse3_monitor64"
5195 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5196 (match_operand:SI 1 "register_operand" "c")
5197 (match_operand:SI 2 "register_operand" "d")]
5198 UNSPECV_MONITOR)]
5199 "TARGET_SSE3 && TARGET_64BIT"
5200 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5201 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5202 ;; zero extended to 64bit, we only need to set up 32bit registers.
5203 "monitor"
5204 [(set_attr "length" "3")])
5205
5206 ;; SSSE3
5207 (define_insn "ssse3_phaddwv8hi3"
5208 [(set (match_operand:V8HI 0 "register_operand" "=x")
5209 (vec_concat:V8HI
5210 (vec_concat:V4HI
5211 (vec_concat:V2HI
5212 (plus:HI
5213 (vec_select:HI
5214 (match_operand:V8HI 1 "register_operand" "0")
5215 (parallel [(const_int 0)]))
5216 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5217 (plus:HI
5218 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5219 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5220 (vec_concat:V2HI
5221 (plus:HI
5222 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5223 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5224 (plus:HI
5225 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5226 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5227 (vec_concat:V4HI
5228 (vec_concat:V2HI
5229 (plus:HI
5230 (vec_select:HI
5231 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5232 (parallel [(const_int 0)]))
5233 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5234 (plus:HI
5235 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5236 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5237 (vec_concat:V2HI
5238 (plus:HI
5239 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5240 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5241 (plus:HI
5242 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5243 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5244 "TARGET_SSSE3"
5245 "phaddw\t{%2, %0|%0, %2}"
5246 [(set_attr "type" "sseiadd")
5247 (set_attr "prefix_data16" "1")
5248 (set_attr "prefix_extra" "1")
5249 (set_attr "mode" "TI")])
5250
5251 (define_insn "ssse3_phaddwv4hi3"
5252 [(set (match_operand:V4HI 0 "register_operand" "=y")
5253 (vec_concat:V4HI
5254 (vec_concat:V2HI
5255 (plus:HI
5256 (vec_select:HI
5257 (match_operand:V4HI 1 "register_operand" "0")
5258 (parallel [(const_int 0)]))
5259 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5260 (plus:HI
5261 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5262 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5263 (vec_concat:V2HI
5264 (plus:HI
5265 (vec_select:HI
5266 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5267 (parallel [(const_int 0)]))
5268 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5269 (plus:HI
5270 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5271 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5272 "TARGET_SSSE3"
5273 "phaddw\t{%2, %0|%0, %2}"
5274 [(set_attr "type" "sseiadd")
5275 (set_attr "prefix_extra" "1")
5276 (set_attr "mode" "DI")])
5277
5278 (define_insn "ssse3_phadddv4si3"
5279 [(set (match_operand:V4SI 0 "register_operand" "=x")
5280 (vec_concat:V4SI
5281 (vec_concat:V2SI
5282 (plus:SI
5283 (vec_select:SI
5284 (match_operand:V4SI 1 "register_operand" "0")
5285 (parallel [(const_int 0)]))
5286 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5287 (plus:SI
5288 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5289 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5290 (vec_concat:V2SI
5291 (plus:SI
5292 (vec_select:SI
5293 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5294 (parallel [(const_int 0)]))
5295 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5296 (plus:SI
5297 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5298 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5299 "TARGET_SSSE3"
5300 "phaddd\t{%2, %0|%0, %2}"
5301 [(set_attr "type" "sseiadd")
5302 (set_attr "prefix_data16" "1")
5303 (set_attr "prefix_extra" "1")
5304 (set_attr "mode" "TI")])
5305
5306 (define_insn "ssse3_phadddv2si3"
5307 [(set (match_operand:V2SI 0 "register_operand" "=y")
5308 (vec_concat:V2SI
5309 (plus:SI
5310 (vec_select:SI
5311 (match_operand:V2SI 1 "register_operand" "0")
5312 (parallel [(const_int 0)]))
5313 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5314 (plus:SI
5315 (vec_select:SI
5316 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5317 (parallel [(const_int 0)]))
5318 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5319 "TARGET_SSSE3"
5320 "phaddd\t{%2, %0|%0, %2}"
5321 [(set_attr "type" "sseiadd")
5322 (set_attr "prefix_extra" "1")
5323 (set_attr "mode" "DI")])
5324
5325 (define_insn "ssse3_phaddswv8hi3"
5326 [(set (match_operand:V8HI 0 "register_operand" "=x")
5327 (vec_concat:V8HI
5328 (vec_concat:V4HI
5329 (vec_concat:V2HI
5330 (ss_plus:HI
5331 (vec_select:HI
5332 (match_operand:V8HI 1 "register_operand" "0")
5333 (parallel [(const_int 0)]))
5334 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5335 (ss_plus:HI
5336 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5337 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5338 (vec_concat:V2HI
5339 (ss_plus:HI
5340 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5341 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5342 (ss_plus:HI
5343 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5344 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5345 (vec_concat:V4HI
5346 (vec_concat:V2HI
5347 (ss_plus:HI
5348 (vec_select:HI
5349 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5350 (parallel [(const_int 0)]))
5351 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5352 (ss_plus:HI
5353 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5354 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5355 (vec_concat:V2HI
5356 (ss_plus:HI
5357 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5358 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5359 (ss_plus:HI
5360 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5361 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5362 "TARGET_SSSE3"
5363 "phaddsw\t{%2, %0|%0, %2}"
5364 [(set_attr "type" "sseiadd")
5365 (set_attr "prefix_data16" "1")
5366 (set_attr "prefix_extra" "1")
5367 (set_attr "mode" "TI")])
5368
5369 (define_insn "ssse3_phaddswv4hi3"
5370 [(set (match_operand:V4HI 0 "register_operand" "=y")
5371 (vec_concat:V4HI
5372 (vec_concat:V2HI
5373 (ss_plus:HI
5374 (vec_select:HI
5375 (match_operand:V4HI 1 "register_operand" "0")
5376 (parallel [(const_int 0)]))
5377 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5378 (ss_plus:HI
5379 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5380 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5381 (vec_concat:V2HI
5382 (ss_plus:HI
5383 (vec_select:HI
5384 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5385 (parallel [(const_int 0)]))
5386 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5387 (ss_plus:HI
5388 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5389 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5390 "TARGET_SSSE3"
5391 "phaddsw\t{%2, %0|%0, %2}"
5392 [(set_attr "type" "sseiadd")
5393 (set_attr "prefix_extra" "1")
5394 (set_attr "mode" "DI")])
5395
5396 (define_insn "ssse3_phsubwv8hi3"
5397 [(set (match_operand:V8HI 0 "register_operand" "=x")
5398 (vec_concat:V8HI
5399 (vec_concat:V4HI
5400 (vec_concat:V2HI
5401 (minus:HI
5402 (vec_select:HI
5403 (match_operand:V8HI 1 "register_operand" "0")
5404 (parallel [(const_int 0)]))
5405 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5406 (minus:HI
5407 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5408 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5409 (vec_concat:V2HI
5410 (minus:HI
5411 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5412 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5413 (minus:HI
5414 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5415 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5416 (vec_concat:V4HI
5417 (vec_concat:V2HI
5418 (minus:HI
5419 (vec_select:HI
5420 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5421 (parallel [(const_int 0)]))
5422 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5423 (minus:HI
5424 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5425 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5426 (vec_concat:V2HI
5427 (minus:HI
5428 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5429 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5430 (minus:HI
5431 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5432 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5433 "TARGET_SSSE3"
5434 "phsubw\t{%2, %0|%0, %2}"
5435 [(set_attr "type" "sseiadd")
5436 (set_attr "prefix_data16" "1")
5437 (set_attr "prefix_extra" "1")
5438 (set_attr "mode" "TI")])
5439
5440 (define_insn "ssse3_phsubwv4hi3"
5441 [(set (match_operand:V4HI 0 "register_operand" "=y")
5442 (vec_concat:V4HI
5443 (vec_concat:V2HI
5444 (minus:HI
5445 (vec_select:HI
5446 (match_operand:V4HI 1 "register_operand" "0")
5447 (parallel [(const_int 0)]))
5448 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5449 (minus:HI
5450 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5451 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5452 (vec_concat:V2HI
5453 (minus:HI
5454 (vec_select:HI
5455 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5456 (parallel [(const_int 0)]))
5457 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5458 (minus:HI
5459 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5460 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5461 "TARGET_SSSE3"
5462 "phsubw\t{%2, %0|%0, %2}"
5463 [(set_attr "type" "sseiadd")
5464 (set_attr "prefix_extra" "1")
5465 (set_attr "mode" "DI")])
5466
5467 (define_insn "ssse3_phsubdv4si3"
5468 [(set (match_operand:V4SI 0 "register_operand" "=x")
5469 (vec_concat:V4SI
5470 (vec_concat:V2SI
5471 (minus:SI
5472 (vec_select:SI
5473 (match_operand:V4SI 1 "register_operand" "0")
5474 (parallel [(const_int 0)]))
5475 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5476 (minus:SI
5477 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5478 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5479 (vec_concat:V2SI
5480 (minus:SI
5481 (vec_select:SI
5482 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5483 (parallel [(const_int 0)]))
5484 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5485 (minus:SI
5486 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5487 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5488 "TARGET_SSSE3"
5489 "phsubd\t{%2, %0|%0, %2}"
5490 [(set_attr "type" "sseiadd")
5491 (set_attr "prefix_data16" "1")
5492 (set_attr "prefix_extra" "1")
5493 (set_attr "mode" "TI")])
5494
5495 (define_insn "ssse3_phsubdv2si3"
5496 [(set (match_operand:V2SI 0 "register_operand" "=y")
5497 (vec_concat:V2SI
5498 (minus:SI
5499 (vec_select:SI
5500 (match_operand:V2SI 1 "register_operand" "0")
5501 (parallel [(const_int 0)]))
5502 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5503 (minus:SI
5504 (vec_select:SI
5505 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5506 (parallel [(const_int 0)]))
5507 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5508 "TARGET_SSSE3"
5509 "phsubd\t{%2, %0|%0, %2}"
5510 [(set_attr "type" "sseiadd")
5511 (set_attr "prefix_extra" "1")
5512 (set_attr "mode" "DI")])
5513
5514 (define_insn "ssse3_phsubswv8hi3"
5515 [(set (match_operand:V8HI 0 "register_operand" "=x")
5516 (vec_concat:V8HI
5517 (vec_concat:V4HI
5518 (vec_concat:V2HI
5519 (ss_minus:HI
5520 (vec_select:HI
5521 (match_operand:V8HI 1 "register_operand" "0")
5522 (parallel [(const_int 0)]))
5523 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5524 (ss_minus:HI
5525 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5526 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5527 (vec_concat:V2HI
5528 (ss_minus:HI
5529 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5530 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5531 (ss_minus:HI
5532 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5533 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5534 (vec_concat:V4HI
5535 (vec_concat:V2HI
5536 (ss_minus:HI
5537 (vec_select:HI
5538 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5539 (parallel [(const_int 0)]))
5540 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5541 (ss_minus:HI
5542 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5543 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5544 (vec_concat:V2HI
5545 (ss_minus:HI
5546 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5547 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5548 (ss_minus:HI
5549 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5550 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5551 "TARGET_SSSE3"
5552 "phsubsw\t{%2, %0|%0, %2}"
5553 [(set_attr "type" "sseiadd")
5554 (set_attr "prefix_data16" "1")
5555 (set_attr "prefix_extra" "1")
5556 (set_attr "mode" "TI")])
5557
5558 (define_insn "ssse3_phsubswv4hi3"
5559 [(set (match_operand:V4HI 0 "register_operand" "=y")
5560 (vec_concat:V4HI
5561 (vec_concat:V2HI
5562 (ss_minus:HI
5563 (vec_select:HI
5564 (match_operand:V4HI 1 "register_operand" "0")
5565 (parallel [(const_int 0)]))
5566 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5567 (ss_minus:HI
5568 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5569 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5570 (vec_concat:V2HI
5571 (ss_minus:HI
5572 (vec_select:HI
5573 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5574 (parallel [(const_int 0)]))
5575 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5576 (ss_minus:HI
5577 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5578 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5579 "TARGET_SSSE3"
5580 "phsubsw\t{%2, %0|%0, %2}"
5581 [(set_attr "type" "sseiadd")
5582 (set_attr "prefix_extra" "1")
5583 (set_attr "mode" "DI")])
5584
5585 (define_insn "ssse3_pmaddubswv8hi3"
5586 [(set (match_operand:V8HI 0 "register_operand" "=x")
5587 (ss_plus:V8HI
5588 (mult:V8HI
5589 (zero_extend:V8HI
5590 (vec_select:V4QI
5591 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5592 (parallel [(const_int 0)
5593 (const_int 2)
5594 (const_int 4)
5595 (const_int 6)
5596 (const_int 8)
5597 (const_int 10)
5598 (const_int 12)
5599 (const_int 14)])))
5600 (sign_extend:V8HI
5601 (vec_select:V8QI
5602 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5603 (parallel [(const_int 0)
5604 (const_int 2)
5605 (const_int 4)
5606 (const_int 6)
5607 (const_int 8)
5608 (const_int 10)
5609 (const_int 12)
5610 (const_int 14)]))))
5611 (mult:V8HI
5612 (zero_extend:V8HI
5613 (vec_select:V16QI (match_dup 1)
5614 (parallel [(const_int 1)
5615 (const_int 3)
5616 (const_int 5)
5617 (const_int 7)
5618 (const_int 9)
5619 (const_int 11)
5620 (const_int 13)
5621 (const_int 15)])))
5622 (sign_extend:V8HI
5623 (vec_select:V16QI (match_dup 2)
5624 (parallel [(const_int 1)
5625 (const_int 3)
5626 (const_int 5)
5627 (const_int 7)
5628 (const_int 9)
5629 (const_int 11)
5630 (const_int 13)
5631 (const_int 15)]))))))]
5632 "TARGET_SSSE3"
5633 "pmaddubsw\t{%2, %0|%0, %2}"
5634 [(set_attr "type" "sseiadd")
5635 (set_attr "prefix_data16" "1")
5636 (set_attr "prefix_extra" "1")
5637 (set_attr "mode" "TI")])
5638
5639 (define_insn "ssse3_pmaddubswv4hi3"
5640 [(set (match_operand:V4HI 0 "register_operand" "=y")
5641 (ss_plus:V4HI
5642 (mult:V4HI
5643 (zero_extend:V4HI
5644 (vec_select:V4QI
5645 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
5646 (parallel [(const_int 0)
5647 (const_int 2)
5648 (const_int 4)
5649 (const_int 6)])))
5650 (sign_extend:V4HI
5651 (vec_select:V4QI
5652 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5653 (parallel [(const_int 0)
5654 (const_int 2)
5655 (const_int 4)
5656 (const_int 6)]))))
5657 (mult:V4HI
5658 (zero_extend:V4HI
5659 (vec_select:V8QI (match_dup 1)
5660 (parallel [(const_int 1)
5661 (const_int 3)
5662 (const_int 5)
5663 (const_int 7)])))
5664 (sign_extend:V4HI
5665 (vec_select:V8QI (match_dup 2)
5666 (parallel [(const_int 1)
5667 (const_int 3)
5668 (const_int 5)
5669 (const_int 7)]))))))]
5670 "TARGET_SSSE3"
5671 "pmaddubsw\t{%2, %0|%0, %2}"
5672 [(set_attr "type" "sseiadd")
5673 (set_attr "prefix_extra" "1")
5674 (set_attr "mode" "DI")])
5675
5676 (define_insn "ssse3_pmulhrswv8hi3"
5677 [(set (match_operand:V8HI 0 "register_operand" "=x")
5678 (truncate:V8HI
5679 (lshiftrt:V8SI
5680 (plus:V8SI
5681 (lshiftrt:V8SI
5682 (mult:V8SI
5683 (sign_extend:V8SI
5684 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5685 (sign_extend:V8SI
5686 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5687 (const_int 14))
5688 (const_vector:V8HI [(const_int 1) (const_int 1)
5689 (const_int 1) (const_int 1)
5690 (const_int 1) (const_int 1)
5691 (const_int 1) (const_int 1)]))
5692 (const_int 1))))]
5693 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5694 "pmulhrsw\t{%2, %0|%0, %2}"
5695 [(set_attr "type" "sseimul")
5696 (set_attr "prefix_data16" "1")
5697 (set_attr "prefix_extra" "1")
5698 (set_attr "mode" "TI")])
5699
5700 (define_insn "ssse3_pmulhrswv4hi3"
5701 [(set (match_operand:V4HI 0 "register_operand" "=y")
5702 (truncate:V4HI
5703 (lshiftrt:V4SI
5704 (plus:V4SI
5705 (lshiftrt:V4SI
5706 (mult:V4SI
5707 (sign_extend:V4SI
5708 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5709 (sign_extend:V4SI
5710 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5711 (const_int 14))
5712 (const_vector:V4HI [(const_int 1) (const_int 1)
5713 (const_int 1) (const_int 1)]))
5714 (const_int 1))))]
5715 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5716 "pmulhrsw\t{%2, %0|%0, %2}"
5717 [(set_attr "type" "sseimul")
5718 (set_attr "prefix_extra" "1")
5719 (set_attr "mode" "DI")])
5720
5721 (define_insn "ssse3_pshufbv16qi3"
5722 [(set (match_operand:V16QI 0 "register_operand" "=x")
5723 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5724 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5725 UNSPEC_PSHUFB))]
5726 "TARGET_SSSE3"
5727 "pshufb\t{%2, %0|%0, %2}";
5728 [(set_attr "type" "sselog1")
5729 (set_attr "prefix_data16" "1")
5730 (set_attr "prefix_extra" "1")
5731 (set_attr "mode" "TI")])
5732
5733 (define_insn "ssse3_pshufbv8qi3"
5734 [(set (match_operand:V8QI 0 "register_operand" "=y")
5735 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5736 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5737 UNSPEC_PSHUFB))]
5738 "TARGET_SSSE3"
5739 "pshufb\t{%2, %0|%0, %2}";
5740 [(set_attr "type" "sselog1")
5741 (set_attr "prefix_extra" "1")
5742 (set_attr "mode" "DI")])
5743
5744 (define_insn "ssse3_psign<mode>3"
5745 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5746 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5747 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5748 UNSPEC_PSIGN))]
5749 "TARGET_SSSE3"
5750 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5751 [(set_attr "type" "sselog1")
5752 (set_attr "prefix_data16" "1")
5753 (set_attr "prefix_extra" "1")
5754 (set_attr "mode" "TI")])
5755
5756 (define_insn "ssse3_psign<mode>3"
5757 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5758 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5759 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5760 UNSPEC_PSIGN))]
5761 "TARGET_SSSE3"
5762 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5763 [(set_attr "type" "sselog1")
5764 (set_attr "prefix_extra" "1")
5765 (set_attr "mode" "DI")])
5766
5767 (define_insn "ssse3_palignrti"
5768 [(set (match_operand:TI 0 "register_operand" "=x")
5769 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5770 (match_operand:TI 2 "nonimmediate_operand" "xm")
5771 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5772 UNSPEC_PALIGNR))]
5773 "TARGET_SSSE3"
5774 {
5775 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5776 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5777 }
5778 [(set_attr "type" "sseishft")
5779 (set_attr "prefix_data16" "1")
5780 (set_attr "prefix_extra" "1")
5781 (set_attr "mode" "TI")])
5782
5783 (define_insn "ssse3_palignrdi"
5784 [(set (match_operand:DI 0 "register_operand" "=y")
5785 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5786 (match_operand:DI 2 "nonimmediate_operand" "ym")
5787 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5788 UNSPEC_PALIGNR))]
5789 "TARGET_SSSE3"
5790 {
5791 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5792 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5793 }
5794 [(set_attr "type" "sseishft")
5795 (set_attr "prefix_extra" "1")
5796 (set_attr "mode" "DI")])
5797
5798 (define_insn "abs<mode>2"
5799 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5800 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5801 "TARGET_SSSE3"
5802 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5803 [(set_attr "type" "sselog1")
5804 (set_attr "prefix_data16" "1")
5805 (set_attr "prefix_extra" "1")
5806 (set_attr "mode" "TI")])
5807
5808 (define_insn "abs<mode>2"
5809 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5810 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5811 "TARGET_SSSE3"
5812 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5813 [(set_attr "type" "sselog1")
5814 (set_attr "prefix_extra" "1")
5815 (set_attr "mode" "DI")])
5816
5817 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5818 ;;
5819 ;; AMD SSE4A instructions
5820 ;;
5821 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5822
5823 (define_insn "sse4a_vmmovntv2df"
5824 [(set (match_operand:DF 0 "memory_operand" "=m")
5825 (unspec:DF [(vec_select:DF
5826 (match_operand:V2DF 1 "register_operand" "x")
5827 (parallel [(const_int 0)]))]
5828 UNSPEC_MOVNT))]
5829 "TARGET_SSE4A"
5830 "movntsd\t{%1, %0|%0, %1}"
5831 [(set_attr "type" "ssemov")
5832 (set_attr "mode" "DF")])
5833
5834 (define_insn "sse4a_movntdf"
5835 [(set (match_operand:DF 0 "memory_operand" "=m")
5836 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
5837 UNSPEC_MOVNT))]
5838 "TARGET_SSE4A"
5839 "movntsd\t{%1, %0|%0, %1}"
5840 [(set_attr "type" "ssemov")
5841 (set_attr "mode" "DF")])
5842
5843 (define_insn "sse4a_vmmovntv4sf"
5844 [(set (match_operand:SF 0 "memory_operand" "=m")
5845 (unspec:SF [(vec_select:SF
5846 (match_operand:V4SF 1 "register_operand" "x")
5847 (parallel [(const_int 0)]))]
5848 UNSPEC_MOVNT))]
5849 "TARGET_SSE4A"
5850 "movntss\t{%1, %0|%0, %1}"
5851 [(set_attr "type" "ssemov")
5852 (set_attr "mode" "SF")])
5853
5854 (define_insn "sse4a_movntsf"
5855 [(set (match_operand:SF 0 "memory_operand" "=m")
5856 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
5857 UNSPEC_MOVNT))]
5858 "TARGET_SSE4A"
5859 "movntss\t{%1, %0|%0, %1}"
5860 [(set_attr "type" "ssemov")
5861 (set_attr "mode" "SF")])
5862
5863 (define_insn "sse4a_extrqi"
5864 [(set (match_operand:V2DI 0 "register_operand" "=x")
5865 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5866 (match_operand 2 "const_int_operand" "")
5867 (match_operand 3 "const_int_operand" "")]
5868 UNSPEC_EXTRQI))]
5869 "TARGET_SSE4A"
5870 "extrq\t{%3, %2, %0|%0, %2, %3}"
5871 [(set_attr "type" "sse")
5872 (set_attr "prefix_data16" "1")
5873 (set_attr "mode" "TI")])
5874
5875 (define_insn "sse4a_extrq"
5876 [(set (match_operand:V2DI 0 "register_operand" "=x")
5877 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5878 (match_operand:V16QI 2 "register_operand" "x")]
5879 UNSPEC_EXTRQ))]
5880 "TARGET_SSE4A"
5881 "extrq\t{%2, %0|%0, %2}"
5882 [(set_attr "type" "sse")
5883 (set_attr "prefix_data16" "1")
5884 (set_attr "mode" "TI")])
5885
5886 (define_insn "sse4a_insertqi"
5887 [(set (match_operand:V2DI 0 "register_operand" "=x")
5888 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5889 (match_operand:V2DI 2 "register_operand" "x")
5890 (match_operand 3 "const_int_operand" "")
5891 (match_operand 4 "const_int_operand" "")]
5892 UNSPEC_INSERTQI))]
5893 "TARGET_SSE4A"
5894 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
5895 [(set_attr "type" "sseins")
5896 (set_attr "prefix_rep" "1")
5897 (set_attr "mode" "TI")])
5898
5899 (define_insn "sse4a_insertq"
5900 [(set (match_operand:V2DI 0 "register_operand" "=x")
5901 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5902 (match_operand:V2DI 2 "register_operand" "x")]
5903 UNSPEC_INSERTQ))]
5904 "TARGET_SSE4A"
5905 "insertq\t{%2, %0|%0, %2}"
5906 [(set_attr "type" "sseins")
5907 (set_attr "prefix_rep" "1")
5908 (set_attr "mode" "TI")])
5909
5910 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5911 ;;
5912 ;; Intel SSE4.1 instructions
5913 ;;
5914 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5915
5916 (define_insn "sse4_1_blendpd"
5917 [(set (match_operand:V2DF 0 "register_operand" "=x")
5918 (vec_merge:V2DF
5919 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
5920 (match_operand:V2DF 1 "register_operand" "0")
5921 (match_operand:SI 3 "const_0_to_3_operand" "n")))]
5922 "TARGET_SSE4_1"
5923 "blendpd\t{%3, %2, %0|%0, %2, %3}"
5924 [(set_attr "type" "ssemov")
5925 (set_attr "prefix_extra" "1")
5926 (set_attr "mode" "V2DF")])
5927
5928 (define_insn "sse4_1_blendps"
5929 [(set (match_operand:V4SF 0 "register_operand" "=x")
5930 (vec_merge:V4SF
5931 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
5932 (match_operand:V4SF 1 "register_operand" "0")
5933 (match_operand:SI 3 "const_0_to_15_operand" "n")))]
5934 "TARGET_SSE4_1"
5935 "blendps\t{%3, %2, %0|%0, %2, %3}"
5936 [(set_attr "type" "ssemov")
5937 (set_attr "prefix_extra" "1")
5938 (set_attr "mode" "V4SF")])
5939
5940 (define_insn "sse4_1_blendvpd"
5941 [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
5942 (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
5943 (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
5944 (match_operand:V2DF 3 "register_operand" "Y0")]
5945 UNSPEC_BLENDV))]
5946 "TARGET_SSE4_1"
5947 "blendvpd\t{%3, %2, %0|%0, %2, %3}"
5948 [(set_attr "type" "ssemov")
5949 (set_attr "prefix_extra" "1")
5950 (set_attr "mode" "V2DF")])
5951
5952 (define_insn "sse4_1_blendvps"
5953 [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
5954 (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
5955 (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
5956 (match_operand:V4SF 3 "register_operand" "Y0")]
5957 UNSPEC_BLENDV))]
5958 "TARGET_SSE4_1"
5959 "blendvps\t{%3, %2, %0|%0, %2, %3}"
5960 [(set_attr "type" "ssemov")
5961 (set_attr "prefix_extra" "1")
5962 (set_attr "mode" "V4SF")])
5963
5964 (define_insn "sse4_1_dppd"
5965 [(set (match_operand:V2DF 0 "register_operand" "=x")
5966 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
5967 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
5968 (match_operand:SI 3 "const_0_to_255_operand" "n")]
5969 UNSPEC_DP))]
5970 "TARGET_SSE4_1"
5971 "dppd\t{%3, %2, %0|%0, %2, %3}"
5972 [(set_attr "type" "ssemul")
5973 (set_attr "prefix_extra" "1")
5974 (set_attr "mode" "V2DF")])
5975
5976 (define_insn "sse4_1_dpps"
5977 [(set (match_operand:V4SF 0 "register_operand" "=x")
5978 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
5979 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
5980 (match_operand:SI 3 "const_0_to_255_operand" "n")]
5981 UNSPEC_DP))]
5982 "TARGET_SSE4_1"
5983 "dpps\t{%3, %2, %0|%0, %2, %3}"
5984 [(set_attr "type" "ssemul")
5985 (set_attr "prefix_extra" "1")
5986 (set_attr "mode" "V4SF")])
5987
5988 (define_insn "sse4_1_movntdqa"
5989 [(set (match_operand:V2DI 0 "register_operand" "=x")
5990 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
5991 UNSPEC_MOVNTDQA))]
5992 "TARGET_SSE4_1"
5993 "movntdqa\t{%1, %0|%0, %1}"
5994 [(set_attr "type" "ssecvt")
5995 (set_attr "prefix_extra" "1")
5996 (set_attr "mode" "TI")])
5997
5998 (define_insn "sse4_1_mpsadbw"
5999 [(set (match_operand:V16QI 0 "register_operand" "=x")
6000 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6001 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6002 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6003 UNSPEC_MPSADBW))]
6004 "TARGET_SSE4_1"
6005 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6006 [(set_attr "type" "sselog1")
6007 (set_attr "prefix_extra" "1")
6008 (set_attr "mode" "TI")])
6009
6010 (define_insn "sse4_1_packusdw"
6011 [(set (match_operand:V8HI 0 "register_operand" "=x")
6012 (vec_concat:V8HI
6013 (us_truncate:V4HI
6014 (match_operand:V4SI 1 "register_operand" "0"))
6015 (us_truncate:V4HI
6016 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6017 "TARGET_SSE4_1"
6018 "packusdw\t{%2, %0|%0, %2}"
6019 [(set_attr "type" "sselog")
6020 (set_attr "prefix_extra" "1")
6021 (set_attr "mode" "TI")])
6022
6023 (define_insn "sse4_1_pblendvb"
6024 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6025 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6026 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6027 (match_operand:V16QI 3 "register_operand" "Y0")]
6028 UNSPEC_BLENDV))]
6029 "TARGET_SSE4_1"
6030 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6031 [(set_attr "type" "ssemov")
6032 (set_attr "prefix_extra" "1")
6033 (set_attr "mode" "TI")])
6034
6035 (define_insn "sse4_1_pblendw"
6036 [(set (match_operand:V8HI 0 "register_operand" "=x")
6037 (vec_merge:V8HI
6038 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6039 (match_operand:V8HI 1 "register_operand" "0")
6040 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6041 "TARGET_SSE4_1"
6042 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6043 [(set_attr "type" "ssemov")
6044 (set_attr "prefix_extra" "1")
6045 (set_attr "mode" "TI")])
6046
6047 (define_insn "sse4_1_phminposuw"
6048 [(set (match_operand:V8HI 0 "register_operand" "=x")
6049 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6050 UNSPEC_PHMINPOSUW))]
6051 "TARGET_SSE4_1"
6052 "phminposuw\t{%1, %0|%0, %1}"
6053 [(set_attr "type" "sselog1")
6054 (set_attr "prefix_extra" "1")
6055 (set_attr "mode" "TI")])
6056
6057 (define_insn "sse4_1_extendv8qiv8hi2"
6058 [(set (match_operand:V8HI 0 "register_operand" "=x")
6059 (sign_extend:V8HI
6060 (vec_select:V8QI
6061 (match_operand:V16QI 1 "register_operand" "x")
6062 (parallel [(const_int 0)
6063 (const_int 1)
6064 (const_int 2)
6065 (const_int 3)
6066 (const_int 4)
6067 (const_int 5)
6068 (const_int 6)
6069 (const_int 7)]))))]
6070 "TARGET_SSE4_1"
6071 "pmovsxbw\t{%1, %0|%0, %1}"
6072 [(set_attr "type" "ssemov")
6073 (set_attr "prefix_extra" "1")
6074 (set_attr "mode" "TI")])
6075
6076 (define_insn "*sse4_1_extendv8qiv8hi2"
6077 [(set (match_operand:V8HI 0 "register_operand" "=x")
6078 (sign_extend:V8HI
6079 (vec_select:V8QI
6080 (vec_duplicate:V16QI
6081 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6082 (parallel [(const_int 0)
6083 (const_int 1)
6084 (const_int 2)
6085 (const_int 3)
6086 (const_int 4)
6087 (const_int 5)
6088 (const_int 6)
6089 (const_int 7)]))))]
6090 "TARGET_SSE4_1"
6091 "pmovsxbw\t{%1, %0|%0, %1}"
6092 [(set_attr "type" "ssemov")
6093 (set_attr "prefix_extra" "1")
6094 (set_attr "mode" "TI")])
6095
6096 (define_insn "sse4_1_extendv4qiv4si2"
6097 [(set (match_operand:V4SI 0 "register_operand" "=x")
6098 (sign_extend:V4SI
6099 (vec_select:V4QI
6100 (match_operand:V16QI 1 "register_operand" "x")
6101 (parallel [(const_int 0)
6102 (const_int 1)
6103 (const_int 2)
6104 (const_int 3)]))))]
6105 "TARGET_SSE4_1"
6106 "pmovsxbd\t{%1, %0|%0, %1}"
6107 [(set_attr "type" "ssemov")
6108 (set_attr "prefix_extra" "1")
6109 (set_attr "mode" "TI")])
6110
6111 (define_insn "*sse4_1_extendv4qiv4si2"
6112 [(set (match_operand:V4SI 0 "register_operand" "=x")
6113 (sign_extend:V4SI
6114 (vec_select:V4QI
6115 (vec_duplicate:V16QI
6116 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6117 (parallel [(const_int 0)
6118 (const_int 1)
6119 (const_int 2)
6120 (const_int 3)]))))]
6121 "TARGET_SSE4_1"
6122 "pmovsxbd\t{%1, %0|%0, %1}"
6123 [(set_attr "type" "ssemov")
6124 (set_attr "prefix_extra" "1")
6125 (set_attr "mode" "TI")])
6126
6127 (define_insn "sse4_1_extendv2qiv2di2"
6128 [(set (match_operand:V2DI 0 "register_operand" "=x")
6129 (sign_extend:V2DI
6130 (vec_select:V2QI
6131 (match_operand:V16QI 1 "register_operand" "x")
6132 (parallel [(const_int 0)
6133 (const_int 1)]))))]
6134 "TARGET_SSE4_1"
6135 "pmovsxbq\t{%1, %0|%0, %1}"
6136 [(set_attr "type" "ssemov")
6137 (set_attr "prefix_extra" "1")
6138 (set_attr "mode" "TI")])
6139
6140 (define_insn "*sse4_1_extendv2qiv2di2"
6141 [(set (match_operand:V2DI 0 "register_operand" "=x")
6142 (sign_extend:V2DI
6143 (vec_select:V2QI
6144 (vec_duplicate:V16QI
6145 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6146 (parallel [(const_int 0)
6147 (const_int 1)]))))]
6148 "TARGET_SSE4_1"
6149 "pmovsxbq\t{%1, %0|%0, %1}"
6150 [(set_attr "type" "ssemov")
6151 (set_attr "prefix_extra" "1")
6152 (set_attr "mode" "TI")])
6153
6154 (define_insn "sse4_1_extendv4hiv4si2"
6155 [(set (match_operand:V4SI 0 "register_operand" "=x")
6156 (sign_extend:V4SI
6157 (vec_select:V4HI
6158 (match_operand:V8HI 1 "register_operand" "x")
6159 (parallel [(const_int 0)
6160 (const_int 1)
6161 (const_int 2)
6162 (const_int 3)]))))]
6163 "TARGET_SSE4_1"
6164 "pmovsxwd\t{%1, %0|%0, %1}"
6165 [(set_attr "type" "ssemov")
6166 (set_attr "prefix_extra" "1")
6167 (set_attr "mode" "TI")])
6168
6169 (define_insn "*sse4_1_extendv4hiv4si2"
6170 [(set (match_operand:V4SI 0 "register_operand" "=x")
6171 (sign_extend:V4SI
6172 (vec_select:V4HI
6173 (vec_duplicate:V8HI
6174 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6175 (parallel [(const_int 0)
6176 (const_int 1)
6177 (const_int 2)
6178 (const_int 3)]))))]
6179 "TARGET_SSE4_1"
6180 "pmovsxwd\t{%1, %0|%0, %1}"
6181 [(set_attr "type" "ssemov")
6182 (set_attr "prefix_extra" "1")
6183 (set_attr "mode" "TI")])
6184
6185 (define_insn "sse4_1_extendv2hiv2di2"
6186 [(set (match_operand:V2DI 0 "register_operand" "=x")
6187 (sign_extend:V2DI
6188 (vec_select:V2HI
6189 (match_operand:V8HI 1 "register_operand" "x")
6190 (parallel [(const_int 0)
6191 (const_int 1)]))))]
6192 "TARGET_SSE4_1"
6193 "pmovsxwq\t{%1, %0|%0, %1}"
6194 [(set_attr "type" "ssemov")
6195 (set_attr "prefix_extra" "1")
6196 (set_attr "mode" "TI")])
6197
6198 (define_insn "*sse4_1_extendv2hiv2di2"
6199 [(set (match_operand:V2DI 0 "register_operand" "=x")
6200 (sign_extend:V2DI
6201 (vec_select:V2HI
6202 (vec_duplicate:V8HI
6203 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6204 (parallel [(const_int 0)
6205 (const_int 1)]))))]
6206 "TARGET_SSE4_1"
6207 "pmovsxwq\t{%1, %0|%0, %1}"
6208 [(set_attr "type" "ssemov")
6209 (set_attr "prefix_extra" "1")
6210 (set_attr "mode" "TI")])
6211
6212 (define_insn "sse4_1_extendv2siv2di2"
6213 [(set (match_operand:V2DI 0 "register_operand" "=x")
6214 (sign_extend:V2DI
6215 (vec_select:V2SI
6216 (match_operand:V4SI 1 "register_operand" "x")
6217 (parallel [(const_int 0)
6218 (const_int 1)]))))]
6219 "TARGET_SSE4_1"
6220 "pmovsxdq\t{%1, %0|%0, %1}"
6221 [(set_attr "type" "ssemov")
6222 (set_attr "prefix_extra" "1")
6223 (set_attr "mode" "TI")])
6224
6225 (define_insn "*sse4_1_extendv2siv2di2"
6226 [(set (match_operand:V2DI 0 "register_operand" "=x")
6227 (sign_extend:V2DI
6228 (vec_select:V2SI
6229 (vec_duplicate:V4SI
6230 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6231 (parallel [(const_int 0)
6232 (const_int 1)]))))]
6233 "TARGET_SSE4_1"
6234 "pmovsxdq\t{%1, %0|%0, %1}"
6235 [(set_attr "type" "ssemov")
6236 (set_attr "prefix_extra" "1")
6237 (set_attr "mode" "TI")])
6238
6239 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6240 [(set (match_operand:V8HI 0 "register_operand" "=x")
6241 (zero_extend:V8HI
6242 (vec_select:V8QI
6243 (match_operand:V16QI 1 "register_operand" "x")
6244 (parallel [(const_int 0)
6245 (const_int 1)
6246 (const_int 2)
6247 (const_int 3)
6248 (const_int 4)
6249 (const_int 5)
6250 (const_int 6)
6251 (const_int 7)]))))]
6252 "TARGET_SSE4_1"
6253 "pmovzxbw\t{%1, %0|%0, %1}"
6254 [(set_attr "type" "ssemov")
6255 (set_attr "prefix_extra" "1")
6256 (set_attr "mode" "TI")])
6257
6258 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6259 [(set (match_operand:V8HI 0 "register_operand" "=x")
6260 (zero_extend:V8HI
6261 (vec_select:V8QI
6262 (vec_duplicate:V16QI
6263 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6264 (parallel [(const_int 0)
6265 (const_int 1)
6266 (const_int 2)
6267 (const_int 3)
6268 (const_int 4)
6269 (const_int 5)
6270 (const_int 6)
6271 (const_int 7)]))))]
6272 "TARGET_SSE4_1"
6273 "pmovzxbw\t{%1, %0|%0, %1}"
6274 [(set_attr "type" "ssemov")
6275 (set_attr "prefix_extra" "1")
6276 (set_attr "mode" "TI")])
6277
6278 (define_insn "sse4_1_zero_extendv4qiv4si2"
6279 [(set (match_operand:V4SI 0 "register_operand" "=x")
6280 (zero_extend:V4SI
6281 (vec_select:V4QI
6282 (match_operand:V16QI 1 "register_operand" "x")
6283 (parallel [(const_int 0)
6284 (const_int 1)
6285 (const_int 2)
6286 (const_int 3)]))))]
6287 "TARGET_SSE4_1"
6288 "pmovzxbd\t{%1, %0|%0, %1}"
6289 [(set_attr "type" "ssemov")
6290 (set_attr "prefix_extra" "1")
6291 (set_attr "mode" "TI")])
6292
6293 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6294 [(set (match_operand:V4SI 0 "register_operand" "=x")
6295 (zero_extend:V4SI
6296 (vec_select:V4QI
6297 (vec_duplicate:V16QI
6298 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6299 (parallel [(const_int 0)
6300 (const_int 1)
6301 (const_int 2)
6302 (const_int 3)]))))]
6303 "TARGET_SSE4_1"
6304 "pmovzxbd\t{%1, %0|%0, %1}"
6305 [(set_attr "type" "ssemov")
6306 (set_attr "prefix_extra" "1")
6307 (set_attr "mode" "TI")])
6308
6309 (define_insn "sse4_1_zero_extendv2qiv2di2"
6310 [(set (match_operand:V2DI 0 "register_operand" "=x")
6311 (zero_extend:V2DI
6312 (vec_select:V2QI
6313 (match_operand:V16QI 1 "register_operand" "x")
6314 (parallel [(const_int 0)
6315 (const_int 1)]))))]
6316 "TARGET_SSE4_1"
6317 "pmovzxbq\t{%1, %0|%0, %1}"
6318 [(set_attr "type" "ssemov")
6319 (set_attr "prefix_extra" "1")
6320 (set_attr "mode" "TI")])
6321
6322 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6323 [(set (match_operand:V2DI 0 "register_operand" "=x")
6324 (zero_extend:V2DI
6325 (vec_select:V2QI
6326 (vec_duplicate:V16QI
6327 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6328 (parallel [(const_int 0)
6329 (const_int 1)]))))]
6330 "TARGET_SSE4_1"
6331 "pmovzxbq\t{%1, %0|%0, %1}"
6332 [(set_attr "type" "ssemov")
6333 (set_attr "prefix_extra" "1")
6334 (set_attr "mode" "TI")])
6335
6336 (define_insn "sse4_1_zero_extendv4hiv4si2"
6337 [(set (match_operand:V4SI 0 "register_operand" "=x")
6338 (zero_extend:V4SI
6339 (vec_select:V4HI
6340 (match_operand:V8HI 1 "register_operand" "x")
6341 (parallel [(const_int 0)
6342 (const_int 1)
6343 (const_int 2)
6344 (const_int 3)]))))]
6345 "TARGET_SSE4_1"
6346 "pmovzxwd\t{%1, %0|%0, %1}"
6347 [(set_attr "type" "ssemov")
6348 (set_attr "prefix_extra" "1")
6349 (set_attr "mode" "TI")])
6350
6351 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6352 [(set (match_operand:V4SI 0 "register_operand" "=x")
6353 (zero_extend:V4SI
6354 (vec_select:V4HI
6355 (vec_duplicate:V8HI
6356 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6357 (parallel [(const_int 0)
6358 (const_int 1)
6359 (const_int 2)
6360 (const_int 3)]))))]
6361 "TARGET_SSE4_1"
6362 "pmovzxwd\t{%1, %0|%0, %1}"
6363 [(set_attr "type" "ssemov")
6364 (set_attr "prefix_extra" "1")
6365 (set_attr "mode" "TI")])
6366
6367 (define_insn "sse4_1_zero_extendv2hiv2di2"
6368 [(set (match_operand:V2DI 0 "register_operand" "=x")
6369 (zero_extend:V2DI
6370 (vec_select:V2HI
6371 (match_operand:V8HI 1 "register_operand" "x")
6372 (parallel [(const_int 0)
6373 (const_int 1)]))))]
6374 "TARGET_SSE4_1"
6375 "pmovzxwq\t{%1, %0|%0, %1}"
6376 [(set_attr "type" "ssemov")
6377 (set_attr "prefix_extra" "1")
6378 (set_attr "mode" "TI")])
6379
6380 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6381 [(set (match_operand:V2DI 0 "register_operand" "=x")
6382 (zero_extend:V2DI
6383 (vec_select:V2HI
6384 (vec_duplicate:V8HI
6385 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6386 (parallel [(const_int 0)
6387 (const_int 1)]))))]
6388 "TARGET_SSE4_1"
6389 "pmovzxwq\t{%1, %0|%0, %1}"
6390 [(set_attr "type" "ssemov")
6391 (set_attr "prefix_extra" "1")
6392 (set_attr "mode" "TI")])
6393
6394 (define_insn "sse4_1_zero_extendv2siv2di2"
6395 [(set (match_operand:V2DI 0 "register_operand" "=x")
6396 (zero_extend:V2DI
6397 (vec_select:V2SI
6398 (match_operand:V4SI 1 "register_operand" "x")
6399 (parallel [(const_int 0)
6400 (const_int 1)]))))]
6401 "TARGET_SSE4_1"
6402 "pmovzxdq\t{%1, %0|%0, %1}"
6403 [(set_attr "type" "ssemov")
6404 (set_attr "prefix_extra" "1")
6405 (set_attr "mode" "TI")])
6406
6407 (define_insn "*sse4_1_zero_extendv2siv2di2"
6408 [(set (match_operand:V2DI 0 "register_operand" "=x")
6409 (zero_extend:V2DI
6410 (vec_select:V2SI
6411 (vec_duplicate:V4SI
6412 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6413 (parallel [(const_int 0)
6414 (const_int 1)]))))]
6415 "TARGET_SSE4_1"
6416 "pmovzxdq\t{%1, %0|%0, %1}"
6417 [(set_attr "type" "ssemov")
6418 (set_attr "prefix_extra" "1")
6419 (set_attr "mode" "TI")])
6420
6421 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6422 ;; But it is not a really compare instruction.
6423 (define_insn "sse4_1_ptest"
6424 [(set (reg:CC FLAGS_REG)
6425 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6426 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6427 UNSPEC_PTEST))]
6428 "TARGET_SSE4_1"
6429 "ptest\t{%1, %0|%0, %1}"
6430 [(set_attr "type" "ssecomi")
6431 (set_attr "prefix_extra" "1")
6432 (set_attr "mode" "TI")])
6433
6434 (define_insn "sse4_1_roundpd"
6435 [(set (match_operand:V2DF 0 "register_operand" "=x")
6436 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
6437 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6438 UNSPEC_ROUND))]
6439 "TARGET_SSE4_1"
6440 "roundpd\t{%2, %1, %0|%0, %1, %2}"
6441 [(set_attr "type" "ssecvt")
6442 (set_attr "prefix_extra" "1")
6443 (set_attr "mode" "V2DF")])
6444
6445 (define_insn "sse4_1_roundps"
6446 [(set (match_operand:V4SF 0 "register_operand" "=x")
6447 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
6448 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6449 UNSPEC_ROUND))]
6450 "TARGET_SSE4_1"
6451 "roundps\t{%2, %1, %0|%0, %1, %2}"
6452 [(set_attr "type" "ssecvt")
6453 (set_attr "prefix_extra" "1")
6454 (set_attr "mode" "V4SF")])
6455
6456 (define_insn "sse4_1_roundsd"
6457 [(set (match_operand:V2DF 0 "register_operand" "=x")
6458 (vec_merge:V2DF
6459 (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
6460 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6461 UNSPEC_ROUND)
6462 (match_operand:V2DF 1 "register_operand" "0")
6463 (const_int 1)))]
6464 "TARGET_SSE4_1"
6465 "roundsd\t{%3, %2, %0|%0, %2, %3}"
6466 [(set_attr "type" "ssecvt")
6467 (set_attr "prefix_extra" "1")
6468 (set_attr "mode" "V2DF")])
6469
6470 (define_insn "sse4_1_roundss"
6471 [(set (match_operand:V4SF 0 "register_operand" "=x")
6472 (vec_merge:V4SF
6473 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
6474 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6475 UNSPEC_ROUND)
6476 (match_operand:V4SF 1 "register_operand" "0")
6477 (const_int 1)))]
6478 "TARGET_SSE4_1"
6479 "roundss\t{%3, %2, %0|%0, %2, %3}"
6480 [(set_attr "type" "ssecvt")
6481 (set_attr "prefix_extra" "1")
6482 (set_attr "mode" "V4SF")])
6483
6484 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6485 ;;
6486 ;; Intel SSE4.2 string/text processing instructions
6487 ;;
6488 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6489
6490 (define_insn_and_split "sse4_2_pcmpestr"
6491 [(set (match_operand:SI 0 "register_operand" "=c,c")
6492 (unspec:SI
6493 [(match_operand:V16QI 2 "register_operand" "x,x")
6494 (match_operand:SI 3 "register_operand" "a,a")
6495 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
6496 (match_operand:SI 5 "register_operand" "d,d")
6497 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6498 UNSPEC_PCMPESTR))
6499 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6500 (unspec:V16QI
6501 [(match_dup 2)
6502 (match_dup 3)
6503 (match_dup 4)
6504 (match_dup 5)
6505 (match_dup 6)]
6506 UNSPEC_PCMPESTR))
6507 (set (reg:CC FLAGS_REG)
6508 (unspec:CC
6509 [(match_dup 2)
6510 (match_dup 3)
6511 (match_dup 4)
6512 (match_dup 5)
6513 (match_dup 6)]
6514 UNSPEC_PCMPESTR))]
6515 "TARGET_SSE4_2
6516 && !(reload_completed || reload_in_progress)"
6517 "#"
6518 "&& 1"
6519 [(const_int 0)]
6520 {
6521 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6522 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6523 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6524
6525 if (ecx)
6526 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6527 operands[3], operands[4],
6528 operands[5], operands[6]));
6529 if (xmm0)
6530 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6531 operands[3], operands[4],
6532 operands[5], operands[6]));
6533 if (flags && !(ecx || xmm0))
6534 emit_insn (gen_sse4_2_pcmpestr_cconly (operands[2], operands[3],
6535 operands[4], operands[5],
6536 operands[6]));
6537 DONE;
6538 }
6539 [(set_attr "type" "sselog")
6540 (set_attr "prefix_data16" "1")
6541 (set_attr "prefix_extra" "1")
6542 (set_attr "memory" "none,load")
6543 (set_attr "mode" "TI")])
6544
6545 (define_insn "sse4_2_pcmpestri"
6546 [(set (match_operand:SI 0 "register_operand" "=c,c")
6547 (unspec:SI
6548 [(match_operand:V16QI 1 "register_operand" "x,x")
6549 (match_operand:SI 2 "register_operand" "a,a")
6550 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6551 (match_operand:SI 4 "register_operand" "d,d")
6552 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6553 UNSPEC_PCMPESTR))
6554 (set (reg:CC FLAGS_REG)
6555 (unspec:CC
6556 [(match_dup 1)
6557 (match_dup 2)
6558 (match_dup 3)
6559 (match_dup 4)
6560 (match_dup 5)]
6561 UNSPEC_PCMPESTR))]
6562 "TARGET_SSE4_2"
6563 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6564 [(set_attr "type" "sselog")
6565 (set_attr "prefix_data16" "1")
6566 (set_attr "prefix_extra" "1")
6567 (set_attr "memory" "none,load")
6568 (set_attr "mode" "TI")])
6569
6570 (define_insn "sse4_2_pcmpestrm"
6571 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6572 (unspec:V16QI
6573 [(match_operand:V16QI 1 "register_operand" "x,x")
6574 (match_operand:SI 2 "register_operand" "a,a")
6575 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6576 (match_operand:SI 4 "register_operand" "d,d")
6577 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6578 UNSPEC_PCMPESTR))
6579 (set (reg:CC FLAGS_REG)
6580 (unspec:CC
6581 [(match_dup 1)
6582 (match_dup 2)
6583 (match_dup 3)
6584 (match_dup 4)
6585 (match_dup 5)]
6586 UNSPEC_PCMPESTR))]
6587 "TARGET_SSE4_2"
6588 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6589 [(set_attr "type" "sselog")
6590 (set_attr "prefix_data16" "1")
6591 (set_attr "prefix_extra" "1")
6592 (set_attr "memory" "none,load")
6593 (set_attr "mode" "TI")])
6594
6595 (define_insn "sse4_2_pcmpestr_cconly"
6596 [(set (reg:CC FLAGS_REG)
6597 (unspec:CC
6598 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6599 (match_operand:SI 1 "register_operand" "a,a,a,a")
6600 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x,m")
6601 (match_operand:SI 3 "register_operand" "d,d,d,d")
6602 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6603 UNSPEC_PCMPESTR))
6604 (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
6605 (clobber (match_scratch:SI 6 "= X, X,c,c"))]
6606 "TARGET_SSE4_2"
6607 "@
6608 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6609 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6610 pcmpestri\t{%4, %2, %0|%0, %2, %4}
6611 pcmpestri\t{%4, %2, %0|%0, %2, %4}"
6612 [(set_attr "type" "sselog")
6613 (set_attr "prefix_data16" "1")
6614 (set_attr "prefix_extra" "1")
6615 (set_attr "memory" "none,load,none,load")
6616 (set_attr "mode" "TI")])
6617
6618 (define_insn_and_split "sse4_2_pcmpistr"
6619 [(set (match_operand:SI 0 "register_operand" "=c,c")
6620 (unspec:SI
6621 [(match_operand:V16QI 2 "register_operand" "x,x")
6622 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6623 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6624 UNSPEC_PCMPISTR))
6625 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6626 (unspec:V16QI
6627 [(match_dup 2)
6628 (match_dup 3)
6629 (match_dup 4)]
6630 UNSPEC_PCMPISTR))
6631 (set (reg:CC FLAGS_REG)
6632 (unspec:CC
6633 [(match_dup 2)
6634 (match_dup 3)
6635 (match_dup 4)]
6636 UNSPEC_PCMPISTR))]
6637 "TARGET_SSE4_2
6638 && !(reload_completed || reload_in_progress)"
6639 "#"
6640 "&& 1"
6641 [(const_int 0)]
6642 {
6643 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6644 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6645 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6646
6647 if (ecx)
6648 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6649 operands[3], operands[4]));
6650 if (xmm0)
6651 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6652 operands[3], operands[4]));
6653 if (flags && !(ecx || xmm0))
6654 emit_insn (gen_sse4_2_pcmpistr_cconly (operands[2], operands[3],
6655 operands[4]));
6656 DONE;
6657 }
6658 [(set_attr "type" "sselog")
6659 (set_attr "prefix_data16" "1")
6660 (set_attr "prefix_extra" "1")
6661 (set_attr "memory" "none,load")
6662 (set_attr "mode" "TI")])
6663
6664 (define_insn "sse4_2_pcmpistri"
6665 [(set (match_operand:SI 0 "register_operand" "=c,c")
6666 (unspec:SI
6667 [(match_operand:V16QI 1 "register_operand" "x,x")
6668 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6669 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6670 UNSPEC_PCMPISTR))
6671 (set (reg:CC FLAGS_REG)
6672 (unspec:CC
6673 [(match_dup 1)
6674 (match_dup 2)
6675 (match_dup 3)]
6676 UNSPEC_PCMPISTR))]
6677 "TARGET_SSE4_2"
6678 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6679 [(set_attr "type" "sselog")
6680 (set_attr "prefix_data16" "1")
6681 (set_attr "prefix_extra" "1")
6682 (set_attr "memory" "none,load")
6683 (set_attr "mode" "TI")])
6684
6685 (define_insn "sse4_2_pcmpistrm"
6686 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6687 (unspec:V16QI
6688 [(match_operand:V16QI 1 "register_operand" "x,x")
6689 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6690 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6691 UNSPEC_PCMPISTR))
6692 (set (reg:CC FLAGS_REG)
6693 (unspec:CC
6694 [(match_dup 1)
6695 (match_dup 2)
6696 (match_dup 3)]
6697 UNSPEC_PCMPISTR))]
6698 "TARGET_SSE4_2"
6699 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6700 [(set_attr "type" "sselog")
6701 (set_attr "prefix_data16" "1")
6702 (set_attr "prefix_extra" "1")
6703 (set_attr "memory" "none,load")
6704 (set_attr "mode" "TI")])
6705
6706 (define_insn "sse4_2_pcmpistr_cconly"
6707 [(set (reg:CC FLAGS_REG)
6708 (unspec:CC
6709 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6710 (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
6711 (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
6712 UNSPEC_PCMPISTR))
6713 (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
6714 (clobber (match_scratch:SI 4 "= X, X,c,c"))]
6715 "TARGET_SSE4_2"
6716 "@
6717 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6718 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6719 pcmpistri\t{%2, %1, %0|%0, %1, %2}
6720 pcmpistri\t{%2, %1, %0|%0, %1, %2}"
6721 [(set_attr "type" "sselog")
6722 (set_attr "prefix_data16" "1")
6723 (set_attr "prefix_extra" "1")
6724 (set_attr "memory" "none,load,none,load")
6725 (set_attr "mode" "TI")])