re PR tree-optimization/24659 (Conversions are not vectorized)
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
21
22
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
26
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29
30 ;; Mix-n-match
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
36
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
39
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
41
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43 ;;
44 ;; Move patterns
45 ;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
50
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
54 "TARGET_SSE"
55 {
56 ix86_expand_vector_move (<MODE>mode, operands);
57 DONE;
58 })
59
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
63 "TARGET_SSE
64 && (register_operand (operands[0], <MODE>mode)
65 || register_operand (operands[1], <MODE>mode))"
66 {
67 switch (which_alternative)
68 {
69 case 0:
70 return standard_sse_constant_opcode (insn, operands[1]);
71 case 1:
72 case 2:
73 if (get_attr_mode (insn) == MODE_V4SF)
74 return "movaps\t{%1, %0|%0, %1}";
75 else
76 return "movdqa\t{%1, %0|%0, %1}";
77 default:
78 gcc_unreachable ();
79 }
80 }
81 [(set_attr "type" "sselog1,ssemov,ssemov")
82 (set (attr "mode")
83 (if_then_else
84 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
85 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
86 (and (eq_attr "alternative" "2")
87 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
88 (const_int 0))))
89 (const_string "V4SF")
90 (const_string "TI")))])
91
92 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
93 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
94 ;; from memory, we'd prefer to load the memory directly into the %xmm
95 ;; register. To facilitate this happy circumstance, this pattern won't
96 ;; split until after register allocation. If the 64-bit value didn't
97 ;; come from memory, this is the best we can do. This is much better
98 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
99 ;; from there.
100
101 (define_insn_and_split "movdi_to_sse"
102 [(parallel
103 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
104 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
105 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
106 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
107 "#"
108 "&& reload_completed"
109 [(const_int 0)]
110 {
111 if (register_operand (operands[1], DImode))
112 {
113 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
114 Assemble the 64-bit DImode value in an xmm register. */
115 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
116 gen_rtx_SUBREG (SImode, operands[1], 0)));
117 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
118 gen_rtx_SUBREG (SImode, operands[1], 4)));
119 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
120 }
121 else if (memory_operand (operands[1], DImode))
122 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
123 else
124 gcc_unreachable ();
125 })
126
127 (define_expand "movv4sf"
128 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
129 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
130 "TARGET_SSE"
131 {
132 ix86_expand_vector_move (V4SFmode, operands);
133 DONE;
134 })
135
136 (define_insn "*movv4sf_internal"
137 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
138 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
139 "TARGET_SSE
140 && (register_operand (operands[0], V4SFmode)
141 || register_operand (operands[1], V4SFmode))"
142 {
143 switch (which_alternative)
144 {
145 case 0:
146 return standard_sse_constant_opcode (insn, operands[1]);
147 case 1:
148 case 2:
149 return "movaps\t{%1, %0|%0, %1}";
150 default:
151 gcc_unreachable ();
152 }
153 }
154 [(set_attr "type" "sselog1,ssemov,ssemov")
155 (set_attr "mode" "V4SF")])
156
157 (define_split
158 [(set (match_operand:V4SF 0 "register_operand" "")
159 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
160 "TARGET_SSE && reload_completed"
161 [(set (match_dup 0)
162 (vec_merge:V4SF
163 (vec_duplicate:V4SF (match_dup 1))
164 (match_dup 2)
165 (const_int 1)))]
166 {
167 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
168 operands[2] = CONST0_RTX (V4SFmode);
169 })
170
171 (define_expand "movv2df"
172 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
173 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
174 "TARGET_SSE"
175 {
176 ix86_expand_vector_move (V2DFmode, operands);
177 DONE;
178 })
179
180 (define_insn "*movv2df_internal"
181 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
182 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
183 "TARGET_SSE
184 && (register_operand (operands[0], V2DFmode)
185 || register_operand (operands[1], V2DFmode))"
186 {
187 switch (which_alternative)
188 {
189 case 0:
190 return standard_sse_constant_opcode (insn, operands[1]);
191 case 1:
192 case 2:
193 if (get_attr_mode (insn) == MODE_V4SF)
194 return "movaps\t{%1, %0|%0, %1}";
195 else
196 return "movapd\t{%1, %0|%0, %1}";
197 default:
198 gcc_unreachable ();
199 }
200 }
201 [(set_attr "type" "sselog1,ssemov,ssemov")
202 (set (attr "mode")
203 (if_then_else
204 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
205 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
206 (and (eq_attr "alternative" "2")
207 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
208 (const_int 0))))
209 (const_string "V4SF")
210 (const_string "V2DF")))])
211
212 (define_split
213 [(set (match_operand:V2DF 0 "register_operand" "")
214 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
215 "TARGET_SSE2 && reload_completed"
216 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
217 {
218 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
219 operands[2] = CONST0_RTX (DFmode);
220 })
221
222 (define_expand "push<mode>1"
223 [(match_operand:SSEMODE 0 "register_operand" "")]
224 "TARGET_SSE"
225 {
226 ix86_expand_push (<MODE>mode, operands[0]);
227 DONE;
228 })
229
230 (define_expand "movmisalign<mode>"
231 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
232 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
233 "TARGET_SSE"
234 {
235 ix86_expand_vector_move_misalign (<MODE>mode, operands);
236 DONE;
237 })
238
239 (define_insn "sse_movups"
240 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
241 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
242 UNSPEC_MOVU))]
243 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
244 "movups\t{%1, %0|%0, %1}"
245 [(set_attr "type" "ssemov")
246 (set_attr "mode" "V2DF")])
247
248 (define_insn "sse2_movupd"
249 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
250 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
251 UNSPEC_MOVU))]
252 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
253 "movupd\t{%1, %0|%0, %1}"
254 [(set_attr "type" "ssemov")
255 (set_attr "mode" "V2DF")])
256
257 (define_insn "sse2_movdqu"
258 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
259 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
260 UNSPEC_MOVU))]
261 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
262 "movdqu\t{%1, %0|%0, %1}"
263 [(set_attr "type" "ssemov")
264 (set_attr "prefix_data16" "1")
265 (set_attr "mode" "TI")])
266
267 (define_insn "sse_movntv4sf"
268 [(set (match_operand:V4SF 0 "memory_operand" "=m")
269 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
270 UNSPEC_MOVNT))]
271 "TARGET_SSE"
272 "movntps\t{%1, %0|%0, %1}"
273 [(set_attr "type" "ssemov")
274 (set_attr "mode" "V4SF")])
275
276 (define_insn "sse2_movntv2df"
277 [(set (match_operand:V2DF 0 "memory_operand" "=m")
278 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
279 UNSPEC_MOVNT))]
280 "TARGET_SSE2"
281 "movntpd\t{%1, %0|%0, %1}"
282 [(set_attr "type" "ssecvt")
283 (set_attr "mode" "V2DF")])
284
285 (define_insn "sse2_movntv2di"
286 [(set (match_operand:V2DI 0 "memory_operand" "=m")
287 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
288 UNSPEC_MOVNT))]
289 "TARGET_SSE2"
290 "movntdq\t{%1, %0|%0, %1}"
291 [(set_attr "type" "ssecvt")
292 (set_attr "prefix_data16" "1")
293 (set_attr "mode" "TI")])
294
295 (define_insn "sse2_movntsi"
296 [(set (match_operand:SI 0 "memory_operand" "=m")
297 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
298 UNSPEC_MOVNT))]
299 "TARGET_SSE2"
300 "movnti\t{%1, %0|%0, %1}"
301 [(set_attr "type" "ssecvt")
302 (set_attr "mode" "V2DF")])
303
304 (define_insn "sse3_lddqu"
305 [(set (match_operand:V16QI 0 "register_operand" "=x")
306 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
307 UNSPEC_LDDQU))]
308 "TARGET_SSE3"
309 "lddqu\t{%1, %0|%0, %1}"
310 [(set_attr "type" "ssecvt")
311 (set_attr "prefix_rep" "1")
312 (set_attr "mode" "TI")])
313
314 ; Expand patterns for non-temporal stores. At the moment, only those
315 ; that directly map to insns are defined; it would be possible to
316 ; define patterns for other modes that would expand to several insns.
317
318 (define_expand "storentv4sf"
319 [(set (match_operand:V4SF 0 "memory_operand" "=m")
320 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
321 UNSPEC_MOVNT))]
322 "TARGET_SSE"
323 "")
324
325 (define_expand "storentv2df"
326 [(set (match_operand:V2DF 0 "memory_operand" "=m")
327 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
328 UNSPEC_MOVNT))]
329 "TARGET_SSE2"
330 "")
331
332 (define_expand "storentv2di"
333 [(set (match_operand:V2DI 0 "memory_operand" "=m")
334 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
335 UNSPEC_MOVNT))]
336 "TARGET_SSE2"
337 "")
338
339 (define_expand "storentsi"
340 [(set (match_operand:SI 0 "memory_operand" "=m")
341 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
342 UNSPEC_MOVNT))]
343 "TARGET_SSE2"
344 "")
345
346 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
347 ;;
348 ;; Parallel single-precision floating point arithmetic
349 ;;
350 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
351
352 (define_expand "negv4sf2"
353 [(set (match_operand:V4SF 0 "register_operand" "")
354 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
355 "TARGET_SSE"
356 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
357
358 (define_expand "absv4sf2"
359 [(set (match_operand:V4SF 0 "register_operand" "")
360 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
361 "TARGET_SSE"
362 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
363
364 (define_expand "addv4sf3"
365 [(set (match_operand:V4SF 0 "register_operand" "")
366 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
367 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
368 "TARGET_SSE"
369 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
370
371 (define_insn "*addv4sf3"
372 [(set (match_operand:V4SF 0 "register_operand" "=x")
373 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
374 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
375 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
376 "addps\t{%2, %0|%0, %2}"
377 [(set_attr "type" "sseadd")
378 (set_attr "mode" "V4SF")])
379
380 (define_insn "sse_vmaddv4sf3"
381 [(set (match_operand:V4SF 0 "register_operand" "=x")
382 (vec_merge:V4SF
383 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
384 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
385 (match_dup 1)
386 (const_int 1)))]
387 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
388 "addss\t{%2, %0|%0, %2}"
389 [(set_attr "type" "sseadd")
390 (set_attr "mode" "SF")])
391
392 (define_expand "subv4sf3"
393 [(set (match_operand:V4SF 0 "register_operand" "")
394 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
395 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
396 "TARGET_SSE"
397 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
398
399 (define_insn "*subv4sf3"
400 [(set (match_operand:V4SF 0 "register_operand" "=x")
401 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
402 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
403 "TARGET_SSE"
404 "subps\t{%2, %0|%0, %2}"
405 [(set_attr "type" "sseadd")
406 (set_attr "mode" "V4SF")])
407
408 (define_insn "sse_vmsubv4sf3"
409 [(set (match_operand:V4SF 0 "register_operand" "=x")
410 (vec_merge:V4SF
411 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
412 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
413 (match_dup 1)
414 (const_int 1)))]
415 "TARGET_SSE"
416 "subss\t{%2, %0|%0, %2}"
417 [(set_attr "type" "sseadd")
418 (set_attr "mode" "SF")])
419
420 (define_expand "mulv4sf3"
421 [(set (match_operand:V4SF 0 "register_operand" "")
422 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
423 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
424 "TARGET_SSE"
425 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
426
427 (define_insn "*mulv4sf3"
428 [(set (match_operand:V4SF 0 "register_operand" "=x")
429 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
430 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
431 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
432 "mulps\t{%2, %0|%0, %2}"
433 [(set_attr "type" "ssemul")
434 (set_attr "mode" "V4SF")])
435
436 (define_insn "sse_vmmulv4sf3"
437 [(set (match_operand:V4SF 0 "register_operand" "=x")
438 (vec_merge:V4SF
439 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
440 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
441 (match_dup 1)
442 (const_int 1)))]
443 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
444 "mulss\t{%2, %0|%0, %2}"
445 [(set_attr "type" "ssemul")
446 (set_attr "mode" "SF")])
447
448 (define_expand "divv4sf3"
449 [(set (match_operand:V4SF 0 "register_operand" "")
450 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
451 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
452 "TARGET_SSE"
453 {
454 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
455
456 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
457 && flag_finite_math_only && !flag_trapping_math
458 && flag_unsafe_math_optimizations)
459 {
460 ix86_emit_swdivsf (operands[0], operands[1],
461 operands[2], V4SFmode);
462 DONE;
463 }
464 })
465
466 (define_insn "*divv4sf3"
467 [(set (match_operand:V4SF 0 "register_operand" "=x")
468 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
469 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
470 "TARGET_SSE"
471 "divps\t{%2, %0|%0, %2}"
472 [(set_attr "type" "ssediv")
473 (set_attr "mode" "V4SF")])
474
475 (define_insn "sse_vmdivv4sf3"
476 [(set (match_operand:V4SF 0 "register_operand" "=x")
477 (vec_merge:V4SF
478 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
479 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
480 (match_dup 1)
481 (const_int 1)))]
482 "TARGET_SSE"
483 "divss\t{%2, %0|%0, %2}"
484 [(set_attr "type" "ssediv")
485 (set_attr "mode" "SF")])
486
487 (define_insn "sse_rcpv4sf2"
488 [(set (match_operand:V4SF 0 "register_operand" "=x")
489 (unspec:V4SF
490 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
491 "TARGET_SSE"
492 "rcpps\t{%1, %0|%0, %1}"
493 [(set_attr "type" "sse")
494 (set_attr "mode" "V4SF")])
495
496 (define_insn "sse_vmrcpv4sf2"
497 [(set (match_operand:V4SF 0 "register_operand" "=x")
498 (vec_merge:V4SF
499 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
500 UNSPEC_RCP)
501 (match_operand:V4SF 2 "register_operand" "0")
502 (const_int 1)))]
503 "TARGET_SSE"
504 "rcpss\t{%1, %0|%0, %1}"
505 [(set_attr "type" "sse")
506 (set_attr "mode" "SF")])
507
508 (define_insn "*sse_rsqrtv4sf2"
509 [(set (match_operand:V4SF 0 "register_operand" "=x")
510 (unspec:V4SF
511 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
512 "TARGET_SSE"
513 "rsqrtps\t{%1, %0|%0, %1}"
514 [(set_attr "type" "sse")
515 (set_attr "mode" "V4SF")])
516
517 (define_expand "sse_rsqrtv4sf2"
518 [(set (match_operand:V4SF 0 "register_operand" "")
519 (unspec:V4SF
520 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
521 "TARGET_SSE"
522 {
523 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
524 && flag_finite_math_only && !flag_trapping_math
525 && flag_unsafe_math_optimizations)
526 {
527 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
528 DONE;
529 }
530 })
531
532 (define_insn "sse_vmrsqrtv4sf2"
533 [(set (match_operand:V4SF 0 "register_operand" "=x")
534 (vec_merge:V4SF
535 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
536 UNSPEC_RSQRT)
537 (match_operand:V4SF 2 "register_operand" "0")
538 (const_int 1)))]
539 "TARGET_SSE"
540 "rsqrtss\t{%1, %0|%0, %1}"
541 [(set_attr "type" "sse")
542 (set_attr "mode" "SF")])
543
544 (define_insn "*sqrtv4sf2"
545 [(set (match_operand:V4SF 0 "register_operand" "=x")
546 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
547 "TARGET_SSE"
548 "sqrtps\t{%1, %0|%0, %1}"
549 [(set_attr "type" "sse")
550 (set_attr "mode" "V4SF")])
551
552 (define_expand "sqrtv4sf2"
553 [(set (match_operand:V4SF 0 "register_operand" "=")
554 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
555 "TARGET_SSE"
556 {
557 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
558 && flag_finite_math_only && !flag_trapping_math
559 && flag_unsafe_math_optimizations)
560 {
561 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
562 DONE;
563 }
564 })
565
566 (define_insn "sse_vmsqrtv4sf2"
567 [(set (match_operand:V4SF 0 "register_operand" "=x")
568 (vec_merge:V4SF
569 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
570 (match_operand:V4SF 2 "register_operand" "0")
571 (const_int 1)))]
572 "TARGET_SSE"
573 "sqrtss\t{%1, %0|%0, %1}"
574 [(set_attr "type" "sse")
575 (set_attr "mode" "SF")])
576
577 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
578 ;; isn't really correct, as those rtl operators aren't defined when
579 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
580
581 (define_expand "smaxv4sf3"
582 [(set (match_operand:V4SF 0 "register_operand" "")
583 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
584 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
585 "TARGET_SSE"
586 {
587 if (!flag_finite_math_only)
588 operands[1] = force_reg (V4SFmode, operands[1]);
589 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
590 })
591
592 (define_insn "*smaxv4sf3_finite"
593 [(set (match_operand:V4SF 0 "register_operand" "=x")
594 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
595 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
596 "TARGET_SSE && flag_finite_math_only
597 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
598 "maxps\t{%2, %0|%0, %2}"
599 [(set_attr "type" "sse")
600 (set_attr "mode" "V4SF")])
601
602 (define_insn "*smaxv4sf3"
603 [(set (match_operand:V4SF 0 "register_operand" "=x")
604 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
605 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
606 "TARGET_SSE"
607 "maxps\t{%2, %0|%0, %2}"
608 [(set_attr "type" "sse")
609 (set_attr "mode" "V4SF")])
610
611 (define_insn "sse_vmsmaxv4sf3"
612 [(set (match_operand:V4SF 0 "register_operand" "=x")
613 (vec_merge:V4SF
614 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
615 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
616 (match_dup 1)
617 (const_int 1)))]
618 "TARGET_SSE"
619 "maxss\t{%2, %0|%0, %2}"
620 [(set_attr "type" "sse")
621 (set_attr "mode" "SF")])
622
623 (define_expand "sminv4sf3"
624 [(set (match_operand:V4SF 0 "register_operand" "")
625 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
626 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
627 "TARGET_SSE"
628 {
629 if (!flag_finite_math_only)
630 operands[1] = force_reg (V4SFmode, operands[1]);
631 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
632 })
633
634 (define_insn "*sminv4sf3_finite"
635 [(set (match_operand:V4SF 0 "register_operand" "=x")
636 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
637 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
638 "TARGET_SSE && flag_finite_math_only
639 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
640 "minps\t{%2, %0|%0, %2}"
641 [(set_attr "type" "sse")
642 (set_attr "mode" "V4SF")])
643
644 (define_insn "*sminv4sf3"
645 [(set (match_operand:V4SF 0 "register_operand" "=x")
646 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
647 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
648 "TARGET_SSE"
649 "minps\t{%2, %0|%0, %2}"
650 [(set_attr "type" "sse")
651 (set_attr "mode" "V4SF")])
652
653 (define_insn "sse_vmsminv4sf3"
654 [(set (match_operand:V4SF 0 "register_operand" "=x")
655 (vec_merge:V4SF
656 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
657 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
658 (match_dup 1)
659 (const_int 1)))]
660 "TARGET_SSE"
661 "minss\t{%2, %0|%0, %2}"
662 [(set_attr "type" "sse")
663 (set_attr "mode" "SF")])
664
665 ;; These versions of the min/max patterns implement exactly the operations
666 ;; min = (op1 < op2 ? op1 : op2)
667 ;; max = (!(op1 < op2) ? op1 : op2)
668 ;; Their operands are not commutative, and thus they may be used in the
669 ;; presence of -0.0 and NaN.
670
671 (define_insn "*ieee_sminv4sf3"
672 [(set (match_operand:V4SF 0 "register_operand" "=x")
673 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
674 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
675 UNSPEC_IEEE_MIN))]
676 "TARGET_SSE"
677 "minps\t{%2, %0|%0, %2}"
678 [(set_attr "type" "sseadd")
679 (set_attr "mode" "V4SF")])
680
681 (define_insn "*ieee_smaxv4sf3"
682 [(set (match_operand:V4SF 0 "register_operand" "=x")
683 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
684 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
685 UNSPEC_IEEE_MAX))]
686 "TARGET_SSE"
687 "maxps\t{%2, %0|%0, %2}"
688 [(set_attr "type" "sseadd")
689 (set_attr "mode" "V4SF")])
690
691 (define_insn "*ieee_sminv2df3"
692 [(set (match_operand:V2DF 0 "register_operand" "=x")
693 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
694 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
695 UNSPEC_IEEE_MIN))]
696 "TARGET_SSE2"
697 "minpd\t{%2, %0|%0, %2}"
698 [(set_attr "type" "sseadd")
699 (set_attr "mode" "V2DF")])
700
701 (define_insn "*ieee_smaxv2df3"
702 [(set (match_operand:V2DF 0 "register_operand" "=x")
703 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
704 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
705 UNSPEC_IEEE_MAX))]
706 "TARGET_SSE2"
707 "maxpd\t{%2, %0|%0, %2}"
708 [(set_attr "type" "sseadd")
709 (set_attr "mode" "V2DF")])
710
711 (define_insn "sse3_addsubv4sf3"
712 [(set (match_operand:V4SF 0 "register_operand" "=x")
713 (vec_merge:V4SF
714 (plus:V4SF
715 (match_operand:V4SF 1 "register_operand" "0")
716 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
717 (minus:V4SF (match_dup 1) (match_dup 2))
718 (const_int 5)))]
719 "TARGET_SSE3"
720 "addsubps\t{%2, %0|%0, %2}"
721 [(set_attr "type" "sseadd")
722 (set_attr "prefix_rep" "1")
723 (set_attr "mode" "V4SF")])
724
725 (define_insn "sse3_haddv4sf3"
726 [(set (match_operand:V4SF 0 "register_operand" "=x")
727 (vec_concat:V4SF
728 (vec_concat:V2SF
729 (plus:SF
730 (vec_select:SF
731 (match_operand:V4SF 1 "register_operand" "0")
732 (parallel [(const_int 0)]))
733 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
734 (plus:SF
735 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
736 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
737 (vec_concat:V2SF
738 (plus:SF
739 (vec_select:SF
740 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
741 (parallel [(const_int 0)]))
742 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
743 (plus:SF
744 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
745 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
746 "TARGET_SSE3"
747 "haddps\t{%2, %0|%0, %2}"
748 [(set_attr "type" "sseadd")
749 (set_attr "prefix_rep" "1")
750 (set_attr "mode" "V4SF")])
751
752 (define_insn "sse3_hsubv4sf3"
753 [(set (match_operand:V4SF 0 "register_operand" "=x")
754 (vec_concat:V4SF
755 (vec_concat:V2SF
756 (minus:SF
757 (vec_select:SF
758 (match_operand:V4SF 1 "register_operand" "0")
759 (parallel [(const_int 0)]))
760 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
761 (minus:SF
762 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
763 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
764 (vec_concat:V2SF
765 (minus:SF
766 (vec_select:SF
767 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
768 (parallel [(const_int 0)]))
769 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
770 (minus:SF
771 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
772 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
773 "TARGET_SSE3"
774 "hsubps\t{%2, %0|%0, %2}"
775 [(set_attr "type" "sseadd")
776 (set_attr "prefix_rep" "1")
777 (set_attr "mode" "V4SF")])
778
779 (define_expand "reduc_splus_v4sf"
780 [(match_operand:V4SF 0 "register_operand" "")
781 (match_operand:V4SF 1 "register_operand" "")]
782 "TARGET_SSE"
783 {
784 if (TARGET_SSE3)
785 {
786 rtx tmp = gen_reg_rtx (V4SFmode);
787 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
788 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
789 }
790 else
791 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
792 DONE;
793 })
794
795 (define_expand "reduc_smax_v4sf"
796 [(match_operand:V4SF 0 "register_operand" "")
797 (match_operand:V4SF 1 "register_operand" "")]
798 "TARGET_SSE"
799 {
800 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
801 DONE;
802 })
803
804 (define_expand "reduc_smin_v4sf"
805 [(match_operand:V4SF 0 "register_operand" "")
806 (match_operand:V4SF 1 "register_operand" "")]
807 "TARGET_SSE"
808 {
809 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
810 DONE;
811 })
812
813 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
814 ;;
815 ;; Parallel single-precision floating point comparisons
816 ;;
817 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
818
819 (define_insn "sse_maskcmpv4sf3"
820 [(set (match_operand:V4SF 0 "register_operand" "=x")
821 (match_operator:V4SF 3 "sse_comparison_operator"
822 [(match_operand:V4SF 1 "register_operand" "0")
823 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
824 "TARGET_SSE"
825 "cmp%D3ps\t{%2, %0|%0, %2}"
826 [(set_attr "type" "ssecmp")
827 (set_attr "mode" "V4SF")])
828
829 (define_insn "sse_maskcmpsf3"
830 [(set (match_operand:SF 0 "register_operand" "=x")
831 (match_operator:SF 3 "sse_comparison_operator"
832 [(match_operand:SF 1 "register_operand" "0")
833 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
834 "TARGET_SSE"
835 "cmp%D3ss\t{%2, %0|%0, %2}"
836 [(set_attr "type" "ssecmp")
837 (set_attr "mode" "SF")])
838
839 (define_insn "sse_vmmaskcmpv4sf3"
840 [(set (match_operand:V4SF 0 "register_operand" "=x")
841 (vec_merge:V4SF
842 (match_operator:V4SF 3 "sse_comparison_operator"
843 [(match_operand:V4SF 1 "register_operand" "0")
844 (match_operand:V4SF 2 "register_operand" "x")])
845 (match_dup 1)
846 (const_int 1)))]
847 "TARGET_SSE"
848 "cmp%D3ss\t{%2, %0|%0, %2}"
849 [(set_attr "type" "ssecmp")
850 (set_attr "mode" "SF")])
851
852 (define_insn "sse_comi"
853 [(set (reg:CCFP FLAGS_REG)
854 (compare:CCFP
855 (vec_select:SF
856 (match_operand:V4SF 0 "register_operand" "x")
857 (parallel [(const_int 0)]))
858 (vec_select:SF
859 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
860 (parallel [(const_int 0)]))))]
861 "TARGET_SSE"
862 "comiss\t{%1, %0|%0, %1}"
863 [(set_attr "type" "ssecomi")
864 (set_attr "mode" "SF")])
865
866 (define_insn "sse_ucomi"
867 [(set (reg:CCFPU FLAGS_REG)
868 (compare:CCFPU
869 (vec_select:SF
870 (match_operand:V4SF 0 "register_operand" "x")
871 (parallel [(const_int 0)]))
872 (vec_select:SF
873 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
874 (parallel [(const_int 0)]))))]
875 "TARGET_SSE"
876 "ucomiss\t{%1, %0|%0, %1}"
877 [(set_attr "type" "ssecomi")
878 (set_attr "mode" "SF")])
879
880 (define_expand "vcondv4sf"
881 [(set (match_operand:V4SF 0 "register_operand" "")
882 (if_then_else:V4SF
883 (match_operator 3 ""
884 [(match_operand:V4SF 4 "nonimmediate_operand" "")
885 (match_operand:V4SF 5 "nonimmediate_operand" "")])
886 (match_operand:V4SF 1 "general_operand" "")
887 (match_operand:V4SF 2 "general_operand" "")))]
888 "TARGET_SSE"
889 {
890 if (ix86_expand_fp_vcond (operands))
891 DONE;
892 else
893 FAIL;
894 })
895
896 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
897 ;;
898 ;; Parallel single-precision floating point logical operations
899 ;;
900 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
901
902 (define_expand "andv4sf3"
903 [(set (match_operand:V4SF 0 "register_operand" "")
904 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
905 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
906 "TARGET_SSE"
907 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
908
909 (define_insn "*andv4sf3"
910 [(set (match_operand:V4SF 0 "register_operand" "=x")
911 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
912 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
913 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
914 "andps\t{%2, %0|%0, %2}"
915 [(set_attr "type" "sselog")
916 (set_attr "mode" "V4SF")])
917
918 (define_insn "sse_nandv4sf3"
919 [(set (match_operand:V4SF 0 "register_operand" "=x")
920 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
921 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
922 "TARGET_SSE"
923 "andnps\t{%2, %0|%0, %2}"
924 [(set_attr "type" "sselog")
925 (set_attr "mode" "V4SF")])
926
927 (define_expand "iorv4sf3"
928 [(set (match_operand:V4SF 0 "register_operand" "")
929 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
930 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
931 "TARGET_SSE"
932 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
933
934 (define_insn "*iorv4sf3"
935 [(set (match_operand:V4SF 0 "register_operand" "=x")
936 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
937 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
938 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
939 "orps\t{%2, %0|%0, %2}"
940 [(set_attr "type" "sselog")
941 (set_attr "mode" "V4SF")])
942
943 (define_expand "xorv4sf3"
944 [(set (match_operand:V4SF 0 "register_operand" "")
945 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
946 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
947 "TARGET_SSE"
948 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
949
950 (define_insn "*xorv4sf3"
951 [(set (match_operand:V4SF 0 "register_operand" "=x")
952 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
953 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
954 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
955 "xorps\t{%2, %0|%0, %2}"
956 [(set_attr "type" "sselog")
957 (set_attr "mode" "V4SF")])
958
959 ;; Also define scalar versions. These are used for abs, neg, and
960 ;; conditional move. Using subregs into vector modes causes register
961 ;; allocation lossage. These patterns do not allow memory operands
962 ;; because the native instructions read the full 128-bits.
963
964 (define_insn "*andsf3"
965 [(set (match_operand:SF 0 "register_operand" "=x")
966 (and:SF (match_operand:SF 1 "register_operand" "0")
967 (match_operand:SF 2 "register_operand" "x")))]
968 "TARGET_SSE"
969 "andps\t{%2, %0|%0, %2}"
970 [(set_attr "type" "sselog")
971 (set_attr "mode" "V4SF")])
972
973 (define_insn "*nandsf3"
974 [(set (match_operand:SF 0 "register_operand" "=x")
975 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
976 (match_operand:SF 2 "register_operand" "x")))]
977 "TARGET_SSE"
978 "andnps\t{%2, %0|%0, %2}"
979 [(set_attr "type" "sselog")
980 (set_attr "mode" "V4SF")])
981
982 (define_insn "*iorsf3"
983 [(set (match_operand:SF 0 "register_operand" "=x")
984 (ior:SF (match_operand:SF 1 "register_operand" "0")
985 (match_operand:SF 2 "register_operand" "x")))]
986 "TARGET_SSE"
987 "orps\t{%2, %0|%0, %2}"
988 [(set_attr "type" "sselog")
989 (set_attr "mode" "V4SF")])
990
991 (define_insn "*xorsf3"
992 [(set (match_operand:SF 0 "register_operand" "=x")
993 (xor:SF (match_operand:SF 1 "register_operand" "0")
994 (match_operand:SF 2 "register_operand" "x")))]
995 "TARGET_SSE"
996 "xorps\t{%2, %0|%0, %2}"
997 [(set_attr "type" "sselog")
998 (set_attr "mode" "V4SF")])
999
1000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1001 ;;
1002 ;; Parallel single-precision floating point conversion operations
1003 ;;
1004 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1005
1006 (define_insn "sse_cvtpi2ps"
1007 [(set (match_operand:V4SF 0 "register_operand" "=x")
1008 (vec_merge:V4SF
1009 (vec_duplicate:V4SF
1010 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1011 (match_operand:V4SF 1 "register_operand" "0")
1012 (const_int 3)))]
1013 "TARGET_SSE"
1014 "cvtpi2ps\t{%2, %0|%0, %2}"
1015 [(set_attr "type" "ssecvt")
1016 (set_attr "mode" "V4SF")])
1017
1018 (define_insn "sse_cvtps2pi"
1019 [(set (match_operand:V2SI 0 "register_operand" "=y")
1020 (vec_select:V2SI
1021 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1022 UNSPEC_FIX_NOTRUNC)
1023 (parallel [(const_int 0) (const_int 1)])))]
1024 "TARGET_SSE"
1025 "cvtps2pi\t{%1, %0|%0, %1}"
1026 [(set_attr "type" "ssecvt")
1027 (set_attr "unit" "mmx")
1028 (set_attr "mode" "DI")])
1029
1030 (define_insn "sse_cvttps2pi"
1031 [(set (match_operand:V2SI 0 "register_operand" "=y")
1032 (vec_select:V2SI
1033 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1034 (parallel [(const_int 0) (const_int 1)])))]
1035 "TARGET_SSE"
1036 "cvttps2pi\t{%1, %0|%0, %1}"
1037 [(set_attr "type" "ssecvt")
1038 (set_attr "unit" "mmx")
1039 (set_attr "mode" "SF")])
1040
1041 (define_insn "sse_cvtsi2ss"
1042 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1043 (vec_merge:V4SF
1044 (vec_duplicate:V4SF
1045 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1046 (match_operand:V4SF 1 "register_operand" "0,0")
1047 (const_int 1)))]
1048 "TARGET_SSE"
1049 "cvtsi2ss\t{%2, %0|%0, %2}"
1050 [(set_attr "type" "sseicvt")
1051 (set_attr "athlon_decode" "vector,double")
1052 (set_attr "amdfam10_decode" "vector,double")
1053 (set_attr "mode" "SF")])
1054
1055 (define_insn "sse_cvtsi2ssq"
1056 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1057 (vec_merge:V4SF
1058 (vec_duplicate:V4SF
1059 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1060 (match_operand:V4SF 1 "register_operand" "0,0")
1061 (const_int 1)))]
1062 "TARGET_SSE && TARGET_64BIT"
1063 "cvtsi2ssq\t{%2, %0|%0, %2}"
1064 [(set_attr "type" "sseicvt")
1065 (set_attr "athlon_decode" "vector,double")
1066 (set_attr "amdfam10_decode" "vector,double")
1067 (set_attr "mode" "SF")])
1068
1069 (define_insn "sse_cvtss2si"
1070 [(set (match_operand:SI 0 "register_operand" "=r,r")
1071 (unspec:SI
1072 [(vec_select:SF
1073 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1074 (parallel [(const_int 0)]))]
1075 UNSPEC_FIX_NOTRUNC))]
1076 "TARGET_SSE"
1077 "cvtss2si\t{%1, %0|%0, %1}"
1078 [(set_attr "type" "sseicvt")
1079 (set_attr "athlon_decode" "double,vector")
1080 (set_attr "prefix_rep" "1")
1081 (set_attr "mode" "SI")])
1082
1083 (define_insn "sse_cvtss2si_2"
1084 [(set (match_operand:SI 0 "register_operand" "=r,r")
1085 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1086 UNSPEC_FIX_NOTRUNC))]
1087 "TARGET_SSE"
1088 "cvtss2si\t{%1, %0|%0, %1}"
1089 [(set_attr "type" "sseicvt")
1090 (set_attr "athlon_decode" "double,vector")
1091 (set_attr "amdfam10_decode" "double,double")
1092 (set_attr "prefix_rep" "1")
1093 (set_attr "mode" "SI")])
1094
1095 (define_insn "sse_cvtss2siq"
1096 [(set (match_operand:DI 0 "register_operand" "=r,r")
1097 (unspec:DI
1098 [(vec_select:SF
1099 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1100 (parallel [(const_int 0)]))]
1101 UNSPEC_FIX_NOTRUNC))]
1102 "TARGET_SSE && TARGET_64BIT"
1103 "cvtss2siq\t{%1, %0|%0, %1}"
1104 [(set_attr "type" "sseicvt")
1105 (set_attr "athlon_decode" "double,vector")
1106 (set_attr "prefix_rep" "1")
1107 (set_attr "mode" "DI")])
1108
1109 (define_insn "sse_cvtss2siq_2"
1110 [(set (match_operand:DI 0 "register_operand" "=r,r")
1111 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1112 UNSPEC_FIX_NOTRUNC))]
1113 "TARGET_SSE && TARGET_64BIT"
1114 "cvtss2siq\t{%1, %0|%0, %1}"
1115 [(set_attr "type" "sseicvt")
1116 (set_attr "athlon_decode" "double,vector")
1117 (set_attr "amdfam10_decode" "double,double")
1118 (set_attr "prefix_rep" "1")
1119 (set_attr "mode" "DI")])
1120
1121 (define_insn "sse_cvttss2si"
1122 [(set (match_operand:SI 0 "register_operand" "=r,r")
1123 (fix:SI
1124 (vec_select:SF
1125 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1126 (parallel [(const_int 0)]))))]
1127 "TARGET_SSE"
1128 "cvttss2si\t{%1, %0|%0, %1}"
1129 [(set_attr "type" "sseicvt")
1130 (set_attr "athlon_decode" "double,vector")
1131 (set_attr "amdfam10_decode" "double,double")
1132 (set_attr "prefix_rep" "1")
1133 (set_attr "mode" "SI")])
1134
1135 (define_insn "sse_cvttss2siq"
1136 [(set (match_operand:DI 0 "register_operand" "=r,r")
1137 (fix:DI
1138 (vec_select:SF
1139 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1140 (parallel [(const_int 0)]))))]
1141 "TARGET_SSE && TARGET_64BIT"
1142 "cvttss2siq\t{%1, %0|%0, %1}"
1143 [(set_attr "type" "sseicvt")
1144 (set_attr "athlon_decode" "double,vector")
1145 (set_attr "amdfam10_decode" "double,double")
1146 (set_attr "prefix_rep" "1")
1147 (set_attr "mode" "DI")])
1148
1149 (define_insn "sse2_cvtdq2ps"
1150 [(set (match_operand:V4SF 0 "register_operand" "=x")
1151 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1152 "TARGET_SSE2"
1153 "cvtdq2ps\t{%1, %0|%0, %1}"
1154 [(set_attr "type" "ssecvt")
1155 (set_attr "mode" "V4SF")])
1156
1157 (define_insn "sse2_cvtps2dq"
1158 [(set (match_operand:V4SI 0 "register_operand" "=x")
1159 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1160 UNSPEC_FIX_NOTRUNC))]
1161 "TARGET_SSE2"
1162 "cvtps2dq\t{%1, %0|%0, %1}"
1163 [(set_attr "type" "ssecvt")
1164 (set_attr "prefix_data16" "1")
1165 (set_attr "mode" "TI")])
1166
1167 (define_insn "sse2_cvttps2dq"
1168 [(set (match_operand:V4SI 0 "register_operand" "=x")
1169 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1170 "TARGET_SSE2"
1171 "cvttps2dq\t{%1, %0|%0, %1}"
1172 [(set_attr "type" "ssecvt")
1173 (set_attr "prefix_rep" "1")
1174 (set_attr "mode" "TI")])
1175
1176 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1177 ;;
1178 ;; Parallel single-precision floating point element swizzling
1179 ;;
1180 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1181
1182 (define_insn "sse_movhlps"
1183 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1184 (vec_select:V4SF
1185 (vec_concat:V8SF
1186 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1187 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1188 (parallel [(const_int 6)
1189 (const_int 7)
1190 (const_int 2)
1191 (const_int 3)])))]
1192 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1193 "@
1194 movhlps\t{%2, %0|%0, %2}
1195 movlps\t{%H2, %0|%0, %H2}
1196 movhps\t{%2, %0|%0, %2}"
1197 [(set_attr "type" "ssemov")
1198 (set_attr "mode" "V4SF,V2SF,V2SF")])
1199
1200 (define_insn "sse_movlhps"
1201 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1202 (vec_select:V4SF
1203 (vec_concat:V8SF
1204 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1205 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1206 (parallel [(const_int 0)
1207 (const_int 1)
1208 (const_int 4)
1209 (const_int 5)])))]
1210 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1211 "@
1212 movlhps\t{%2, %0|%0, %2}
1213 movhps\t{%2, %0|%0, %2}
1214 movlps\t{%2, %H0|%H0, %2}"
1215 [(set_attr "type" "ssemov")
1216 (set_attr "mode" "V4SF,V2SF,V2SF")])
1217
1218 (define_insn "sse_unpckhps"
1219 [(set (match_operand:V4SF 0 "register_operand" "=x")
1220 (vec_select:V4SF
1221 (vec_concat:V8SF
1222 (match_operand:V4SF 1 "register_operand" "0")
1223 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1224 (parallel [(const_int 2) (const_int 6)
1225 (const_int 3) (const_int 7)])))]
1226 "TARGET_SSE"
1227 "unpckhps\t{%2, %0|%0, %2}"
1228 [(set_attr "type" "sselog")
1229 (set_attr "mode" "V4SF")])
1230
1231 (define_insn "sse_unpcklps"
1232 [(set (match_operand:V4SF 0 "register_operand" "=x")
1233 (vec_select:V4SF
1234 (vec_concat:V8SF
1235 (match_operand:V4SF 1 "register_operand" "0")
1236 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1237 (parallel [(const_int 0) (const_int 4)
1238 (const_int 1) (const_int 5)])))]
1239 "TARGET_SSE"
1240 "unpcklps\t{%2, %0|%0, %2}"
1241 [(set_attr "type" "sselog")
1242 (set_attr "mode" "V4SF")])
1243
1244 ;; These are modeled with the same vec_concat as the others so that we
1245 ;; capture users of shufps that can use the new instructions
1246 (define_insn "sse3_movshdup"
1247 [(set (match_operand:V4SF 0 "register_operand" "=x")
1248 (vec_select:V4SF
1249 (vec_concat:V8SF
1250 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1251 (match_dup 1))
1252 (parallel [(const_int 1)
1253 (const_int 1)
1254 (const_int 7)
1255 (const_int 7)])))]
1256 "TARGET_SSE3"
1257 "movshdup\t{%1, %0|%0, %1}"
1258 [(set_attr "type" "sse")
1259 (set_attr "prefix_rep" "1")
1260 (set_attr "mode" "V4SF")])
1261
1262 (define_insn "sse3_movsldup"
1263 [(set (match_operand:V4SF 0 "register_operand" "=x")
1264 (vec_select:V4SF
1265 (vec_concat:V8SF
1266 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1267 (match_dup 1))
1268 (parallel [(const_int 0)
1269 (const_int 0)
1270 (const_int 6)
1271 (const_int 6)])))]
1272 "TARGET_SSE3"
1273 "movsldup\t{%1, %0|%0, %1}"
1274 [(set_attr "type" "sse")
1275 (set_attr "prefix_rep" "1")
1276 (set_attr "mode" "V4SF")])
1277
1278 (define_expand "sse_shufps"
1279 [(match_operand:V4SF 0 "register_operand" "")
1280 (match_operand:V4SF 1 "register_operand" "")
1281 (match_operand:V4SF 2 "nonimmediate_operand" "")
1282 (match_operand:SI 3 "const_int_operand" "")]
1283 "TARGET_SSE"
1284 {
1285 int mask = INTVAL (operands[3]);
1286 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1287 GEN_INT ((mask >> 0) & 3),
1288 GEN_INT ((mask >> 2) & 3),
1289 GEN_INT (((mask >> 4) & 3) + 4),
1290 GEN_INT (((mask >> 6) & 3) + 4)));
1291 DONE;
1292 })
1293
1294 (define_insn "sse_shufps_1"
1295 [(set (match_operand:V4SF 0 "register_operand" "=x")
1296 (vec_select:V4SF
1297 (vec_concat:V8SF
1298 (match_operand:V4SF 1 "register_operand" "0")
1299 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1300 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1301 (match_operand 4 "const_0_to_3_operand" "")
1302 (match_operand 5 "const_4_to_7_operand" "")
1303 (match_operand 6 "const_4_to_7_operand" "")])))]
1304 "TARGET_SSE"
1305 {
1306 int mask = 0;
1307 mask |= INTVAL (operands[3]) << 0;
1308 mask |= INTVAL (operands[4]) << 2;
1309 mask |= (INTVAL (operands[5]) - 4) << 4;
1310 mask |= (INTVAL (operands[6]) - 4) << 6;
1311 operands[3] = GEN_INT (mask);
1312
1313 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1314 }
1315 [(set_attr "type" "sselog")
1316 (set_attr "mode" "V4SF")])
1317
1318 (define_insn "sse_storehps"
1319 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1320 (vec_select:V2SF
1321 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1322 (parallel [(const_int 2) (const_int 3)])))]
1323 "TARGET_SSE"
1324 "@
1325 movhps\t{%1, %0|%0, %1}
1326 movhlps\t{%1, %0|%0, %1}
1327 movlps\t{%H1, %0|%0, %H1}"
1328 [(set_attr "type" "ssemov")
1329 (set_attr "mode" "V2SF,V4SF,V2SF")])
1330
1331 (define_insn "sse_loadhps"
1332 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1333 (vec_concat:V4SF
1334 (vec_select:V2SF
1335 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1336 (parallel [(const_int 0) (const_int 1)]))
1337 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1338 "TARGET_SSE"
1339 "@
1340 movhps\t{%2, %0|%0, %2}
1341 movlhps\t{%2, %0|%0, %2}
1342 movlps\t{%2, %H0|%H0, %2}"
1343 [(set_attr "type" "ssemov")
1344 (set_attr "mode" "V2SF,V4SF,V2SF")])
1345
1346 (define_insn "sse_storelps"
1347 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1348 (vec_select:V2SF
1349 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1350 (parallel [(const_int 0) (const_int 1)])))]
1351 "TARGET_SSE"
1352 "@
1353 movlps\t{%1, %0|%0, %1}
1354 movaps\t{%1, %0|%0, %1}
1355 movlps\t{%1, %0|%0, %1}"
1356 [(set_attr "type" "ssemov")
1357 (set_attr "mode" "V2SF,V4SF,V2SF")])
1358
1359 (define_insn "sse_loadlps"
1360 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1361 (vec_concat:V4SF
1362 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1363 (vec_select:V2SF
1364 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1365 (parallel [(const_int 2) (const_int 3)]))))]
1366 "TARGET_SSE"
1367 "@
1368 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1369 movlps\t{%2, %0|%0, %2}
1370 movlps\t{%2, %0|%0, %2}"
1371 [(set_attr "type" "sselog,ssemov,ssemov")
1372 (set_attr "mode" "V4SF,V2SF,V2SF")])
1373
1374 (define_insn "sse_movss"
1375 [(set (match_operand:V4SF 0 "register_operand" "=x")
1376 (vec_merge:V4SF
1377 (match_operand:V4SF 2 "register_operand" "x")
1378 (match_operand:V4SF 1 "register_operand" "0")
1379 (const_int 1)))]
1380 "TARGET_SSE"
1381 "movss\t{%2, %0|%0, %2}"
1382 [(set_attr "type" "ssemov")
1383 (set_attr "mode" "SF")])
1384
1385 (define_insn "*vec_dupv4sf"
1386 [(set (match_operand:V4SF 0 "register_operand" "=x")
1387 (vec_duplicate:V4SF
1388 (match_operand:SF 1 "register_operand" "0")))]
1389 "TARGET_SSE"
1390 "shufps\t{$0, %0, %0|%0, %0, 0}"
1391 [(set_attr "type" "sselog1")
1392 (set_attr "mode" "V4SF")])
1393
1394 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1395 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1396 ;; alternatives pretty much forces the MMX alternative to be chosen.
1397 (define_insn "*sse_concatv2sf"
1398 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1399 (vec_concat:V2SF
1400 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1401 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1402 "TARGET_SSE"
1403 "@
1404 unpcklps\t{%2, %0|%0, %2}
1405 movss\t{%1, %0|%0, %1}
1406 punpckldq\t{%2, %0|%0, %2}
1407 movd\t{%1, %0|%0, %1}"
1408 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1409 (set_attr "mode" "V4SF,SF,DI,DI")])
1410
1411 (define_insn "*sse_concatv4sf"
1412 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1413 (vec_concat:V4SF
1414 (match_operand:V2SF 1 "register_operand" " 0,0")
1415 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1416 "TARGET_SSE"
1417 "@
1418 movlhps\t{%2, %0|%0, %2}
1419 movhps\t{%2, %0|%0, %2}"
1420 [(set_attr "type" "ssemov")
1421 (set_attr "mode" "V4SF,V2SF")])
1422
1423 (define_expand "vec_initv4sf"
1424 [(match_operand:V4SF 0 "register_operand" "")
1425 (match_operand 1 "" "")]
1426 "TARGET_SSE"
1427 {
1428 ix86_expand_vector_init (false, operands[0], operands[1]);
1429 DONE;
1430 })
1431
1432 (define_insn "vec_setv4sf_0"
1433 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Yt,m")
1434 (vec_merge:V4SF
1435 (vec_duplicate:V4SF
1436 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1437 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1438 (const_int 1)))]
1439 "TARGET_SSE"
1440 "@
1441 movss\t{%2, %0|%0, %2}
1442 movss\t{%2, %0|%0, %2}
1443 movd\t{%2, %0|%0, %2}
1444 #"
1445 [(set_attr "type" "ssemov")
1446 (set_attr "mode" "SF")])
1447
1448 ;; A subset is vec_setv4sf.
1449 (define_insn "*vec_setv4sf_sse4_1"
1450 [(set (match_operand:V4SF 0 "register_operand" "=x")
1451 (vec_merge:V4SF
1452 (vec_duplicate:V4SF
1453 (match_operand:SF 2 "nonimmediate_operand" "xm"))
1454 (match_operand:V4SF 1 "register_operand" "0")
1455 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
1456 "TARGET_SSE4_1"
1457 {
1458 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
1459 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1460 }
1461 [(set_attr "type" "sselog")
1462 (set_attr "prefix_extra" "1")
1463 (set_attr "mode" "V4SF")])
1464
1465 (define_insn "sse4_1_insertps"
1466 [(set (match_operand:V4SF 0 "register_operand" "=x")
1467 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
1468 (match_operand:V4SF 1 "register_operand" "0")
1469 (match_operand:SI 3 "const_0_to_255_operand" "n")]
1470 UNSPEC_INSERTPS))]
1471 "TARGET_SSE4_1"
1472 "insertps\t{%3, %2, %0|%0, %2, %3}";
1473 [(set_attr "type" "sselog")
1474 (set_attr "prefix_extra" "1")
1475 (set_attr "mode" "V4SF")])
1476
1477 (define_split
1478 [(set (match_operand:V4SF 0 "memory_operand" "")
1479 (vec_merge:V4SF
1480 (vec_duplicate:V4SF
1481 (match_operand:SF 1 "nonmemory_operand" ""))
1482 (match_dup 0)
1483 (const_int 1)))]
1484 "TARGET_SSE && reload_completed"
1485 [(const_int 0)]
1486 {
1487 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1488 DONE;
1489 })
1490
1491 (define_expand "vec_setv4sf"
1492 [(match_operand:V4SF 0 "register_operand" "")
1493 (match_operand:SF 1 "register_operand" "")
1494 (match_operand 2 "const_int_operand" "")]
1495 "TARGET_SSE"
1496 {
1497 ix86_expand_vector_set (false, operands[0], operands[1],
1498 INTVAL (operands[2]));
1499 DONE;
1500 })
1501
1502 (define_insn_and_split "*vec_extractv4sf_0"
1503 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1504 (vec_select:SF
1505 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1506 (parallel [(const_int 0)])))]
1507 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1508 "#"
1509 "&& reload_completed"
1510 [(const_int 0)]
1511 {
1512 rtx op1 = operands[1];
1513 if (REG_P (op1))
1514 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1515 else
1516 op1 = gen_lowpart (SFmode, op1);
1517 emit_move_insn (operands[0], op1);
1518 DONE;
1519 })
1520
1521 (define_insn "*sse4_1_extractps"
1522 [(set (match_operand:SF 0 "register_operand" "=rm")
1523 (vec_select:SF
1524 (match_operand:V4SF 1 "register_operand" "x")
1525 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
1526 "TARGET_SSE4_1"
1527 "extractps\t{%2, %1, %0|%0, %1, %2}"
1528 [(set_attr "type" "sselog")
1529 (set_attr "prefix_extra" "1")
1530 (set_attr "mode" "V4SF")])
1531
1532 (define_expand "vec_extractv4sf"
1533 [(match_operand:SF 0 "register_operand" "")
1534 (match_operand:V4SF 1 "register_operand" "")
1535 (match_operand 2 "const_int_operand" "")]
1536 "TARGET_SSE"
1537 {
1538 ix86_expand_vector_extract (false, operands[0], operands[1],
1539 INTVAL (operands[2]));
1540 DONE;
1541 })
1542
1543 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1544 ;;
1545 ;; Parallel double-precision floating point arithmetic
1546 ;;
1547 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1548
1549 (define_expand "negv2df2"
1550 [(set (match_operand:V2DF 0 "register_operand" "")
1551 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1552 "TARGET_SSE2"
1553 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1554
1555 (define_expand "absv2df2"
1556 [(set (match_operand:V2DF 0 "register_operand" "")
1557 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1558 "TARGET_SSE2"
1559 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1560
1561 (define_expand "addv2df3"
1562 [(set (match_operand:V2DF 0 "register_operand" "")
1563 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1564 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1565 "TARGET_SSE2"
1566 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1567
1568 (define_insn "*addv2df3"
1569 [(set (match_operand:V2DF 0 "register_operand" "=x")
1570 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1571 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1572 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1573 "addpd\t{%2, %0|%0, %2}"
1574 [(set_attr "type" "sseadd")
1575 (set_attr "mode" "V2DF")])
1576
1577 (define_insn "sse2_vmaddv2df3"
1578 [(set (match_operand:V2DF 0 "register_operand" "=x")
1579 (vec_merge:V2DF
1580 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1581 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1582 (match_dup 1)
1583 (const_int 1)))]
1584 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1585 "addsd\t{%2, %0|%0, %2}"
1586 [(set_attr "type" "sseadd")
1587 (set_attr "mode" "DF")])
1588
1589 (define_expand "subv2df3"
1590 [(set (match_operand:V2DF 0 "register_operand" "")
1591 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1592 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1593 "TARGET_SSE2"
1594 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1595
1596 (define_insn "*subv2df3"
1597 [(set (match_operand:V2DF 0 "register_operand" "=x")
1598 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1599 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1600 "TARGET_SSE2"
1601 "subpd\t{%2, %0|%0, %2}"
1602 [(set_attr "type" "sseadd")
1603 (set_attr "mode" "V2DF")])
1604
1605 (define_insn "sse2_vmsubv2df3"
1606 [(set (match_operand:V2DF 0 "register_operand" "=x")
1607 (vec_merge:V2DF
1608 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1609 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1610 (match_dup 1)
1611 (const_int 1)))]
1612 "TARGET_SSE2"
1613 "subsd\t{%2, %0|%0, %2}"
1614 [(set_attr "type" "sseadd")
1615 (set_attr "mode" "DF")])
1616
1617 (define_expand "mulv2df3"
1618 [(set (match_operand:V2DF 0 "register_operand" "")
1619 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1620 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1621 "TARGET_SSE2"
1622 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1623
1624 (define_insn "*mulv2df3"
1625 [(set (match_operand:V2DF 0 "register_operand" "=x")
1626 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1627 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1628 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1629 "mulpd\t{%2, %0|%0, %2}"
1630 [(set_attr "type" "ssemul")
1631 (set_attr "mode" "V2DF")])
1632
1633 (define_insn "sse2_vmmulv2df3"
1634 [(set (match_operand:V2DF 0 "register_operand" "=x")
1635 (vec_merge:V2DF
1636 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1637 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1638 (match_dup 1)
1639 (const_int 1)))]
1640 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1641 "mulsd\t{%2, %0|%0, %2}"
1642 [(set_attr "type" "ssemul")
1643 (set_attr "mode" "DF")])
1644
1645 (define_expand "divv2df3"
1646 [(set (match_operand:V2DF 0 "register_operand" "")
1647 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1648 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1649 "TARGET_SSE2"
1650 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1651
1652 (define_insn "*divv2df3"
1653 [(set (match_operand:V2DF 0 "register_operand" "=x")
1654 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1655 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1656 "TARGET_SSE2"
1657 "divpd\t{%2, %0|%0, %2}"
1658 [(set_attr "type" "ssediv")
1659 (set_attr "mode" "V2DF")])
1660
1661 (define_insn "sse2_vmdivv2df3"
1662 [(set (match_operand:V2DF 0 "register_operand" "=x")
1663 (vec_merge:V2DF
1664 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1665 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1666 (match_dup 1)
1667 (const_int 1)))]
1668 "TARGET_SSE2"
1669 "divsd\t{%2, %0|%0, %2}"
1670 [(set_attr "type" "ssediv")
1671 (set_attr "mode" "DF")])
1672
1673 (define_insn "sqrtv2df2"
1674 [(set (match_operand:V2DF 0 "register_operand" "=x")
1675 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1676 "TARGET_SSE2"
1677 "sqrtpd\t{%1, %0|%0, %1}"
1678 [(set_attr "type" "sse")
1679 (set_attr "mode" "V2DF")])
1680
1681 (define_insn "sse2_vmsqrtv2df2"
1682 [(set (match_operand:V2DF 0 "register_operand" "=x")
1683 (vec_merge:V2DF
1684 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1685 (match_operand:V2DF 2 "register_operand" "0")
1686 (const_int 1)))]
1687 "TARGET_SSE2"
1688 "sqrtsd\t{%1, %0|%0, %1}"
1689 [(set_attr "type" "sse")
1690 (set_attr "mode" "DF")])
1691
1692 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1693 ;; isn't really correct, as those rtl operators aren't defined when
1694 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1695
1696 (define_expand "smaxv2df3"
1697 [(set (match_operand:V2DF 0 "register_operand" "")
1698 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1699 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1700 "TARGET_SSE2"
1701 {
1702 if (!flag_finite_math_only)
1703 operands[1] = force_reg (V2DFmode, operands[1]);
1704 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1705 })
1706
1707 (define_insn "*smaxv2df3_finite"
1708 [(set (match_operand:V2DF 0 "register_operand" "=x")
1709 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1710 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1711 "TARGET_SSE2 && flag_finite_math_only
1712 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1713 "maxpd\t{%2, %0|%0, %2}"
1714 [(set_attr "type" "sseadd")
1715 (set_attr "mode" "V2DF")])
1716
1717 (define_insn "*smaxv2df3"
1718 [(set (match_operand:V2DF 0 "register_operand" "=x")
1719 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1720 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1721 "TARGET_SSE2"
1722 "maxpd\t{%2, %0|%0, %2}"
1723 [(set_attr "type" "sseadd")
1724 (set_attr "mode" "V2DF")])
1725
1726 (define_insn "sse2_vmsmaxv2df3"
1727 [(set (match_operand:V2DF 0 "register_operand" "=x")
1728 (vec_merge:V2DF
1729 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1730 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1731 (match_dup 1)
1732 (const_int 1)))]
1733 "TARGET_SSE2"
1734 "maxsd\t{%2, %0|%0, %2}"
1735 [(set_attr "type" "sseadd")
1736 (set_attr "mode" "DF")])
1737
1738 (define_expand "sminv2df3"
1739 [(set (match_operand:V2DF 0 "register_operand" "")
1740 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1741 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1742 "TARGET_SSE2"
1743 {
1744 if (!flag_finite_math_only)
1745 operands[1] = force_reg (V2DFmode, operands[1]);
1746 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1747 })
1748
1749 (define_insn "*sminv2df3_finite"
1750 [(set (match_operand:V2DF 0 "register_operand" "=x")
1751 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1752 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1753 "TARGET_SSE2 && flag_finite_math_only
1754 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1755 "minpd\t{%2, %0|%0, %2}"
1756 [(set_attr "type" "sseadd")
1757 (set_attr "mode" "V2DF")])
1758
1759 (define_insn "*sminv2df3"
1760 [(set (match_operand:V2DF 0 "register_operand" "=x")
1761 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1762 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1763 "TARGET_SSE2"
1764 "minpd\t{%2, %0|%0, %2}"
1765 [(set_attr "type" "sseadd")
1766 (set_attr "mode" "V2DF")])
1767
1768 (define_insn "sse2_vmsminv2df3"
1769 [(set (match_operand:V2DF 0 "register_operand" "=x")
1770 (vec_merge:V2DF
1771 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1772 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1773 (match_dup 1)
1774 (const_int 1)))]
1775 "TARGET_SSE2"
1776 "minsd\t{%2, %0|%0, %2}"
1777 [(set_attr "type" "sseadd")
1778 (set_attr "mode" "DF")])
1779
1780 (define_insn "sse3_addsubv2df3"
1781 [(set (match_operand:V2DF 0 "register_operand" "=x")
1782 (vec_merge:V2DF
1783 (plus:V2DF
1784 (match_operand:V2DF 1 "register_operand" "0")
1785 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1786 (minus:V2DF (match_dup 1) (match_dup 2))
1787 (const_int 1)))]
1788 "TARGET_SSE3"
1789 "addsubpd\t{%2, %0|%0, %2}"
1790 [(set_attr "type" "sseadd")
1791 (set_attr "mode" "V2DF")])
1792
1793 (define_insn "sse3_haddv2df3"
1794 [(set (match_operand:V2DF 0 "register_operand" "=x")
1795 (vec_concat:V2DF
1796 (plus:DF
1797 (vec_select:DF
1798 (match_operand:V2DF 1 "register_operand" "0")
1799 (parallel [(const_int 0)]))
1800 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1801 (plus:DF
1802 (vec_select:DF
1803 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1804 (parallel [(const_int 0)]))
1805 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1806 "TARGET_SSE3"
1807 "haddpd\t{%2, %0|%0, %2}"
1808 [(set_attr "type" "sseadd")
1809 (set_attr "mode" "V2DF")])
1810
1811 (define_insn "sse3_hsubv2df3"
1812 [(set (match_operand:V2DF 0 "register_operand" "=x")
1813 (vec_concat:V2DF
1814 (minus:DF
1815 (vec_select:DF
1816 (match_operand:V2DF 1 "register_operand" "0")
1817 (parallel [(const_int 0)]))
1818 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1819 (minus:DF
1820 (vec_select:DF
1821 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1822 (parallel [(const_int 0)]))
1823 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1824 "TARGET_SSE3"
1825 "hsubpd\t{%2, %0|%0, %2}"
1826 [(set_attr "type" "sseadd")
1827 (set_attr "mode" "V2DF")])
1828
1829 (define_expand "reduc_splus_v2df"
1830 [(match_operand:V2DF 0 "register_operand" "")
1831 (match_operand:V2DF 1 "register_operand" "")]
1832 "TARGET_SSE3"
1833 {
1834 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1835 DONE;
1836 })
1837
1838 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1839 ;;
1840 ;; Parallel double-precision floating point comparisons
1841 ;;
1842 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1843
1844 (define_insn "sse2_maskcmpv2df3"
1845 [(set (match_operand:V2DF 0 "register_operand" "=x")
1846 (match_operator:V2DF 3 "sse_comparison_operator"
1847 [(match_operand:V2DF 1 "register_operand" "0")
1848 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1849 "TARGET_SSE2"
1850 "cmp%D3pd\t{%2, %0|%0, %2}"
1851 [(set_attr "type" "ssecmp")
1852 (set_attr "mode" "V2DF")])
1853
1854 (define_insn "sse2_maskcmpdf3"
1855 [(set (match_operand:DF 0 "register_operand" "=x")
1856 (match_operator:DF 3 "sse_comparison_operator"
1857 [(match_operand:DF 1 "register_operand" "0")
1858 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1859 "TARGET_SSE2"
1860 "cmp%D3sd\t{%2, %0|%0, %2}"
1861 [(set_attr "type" "ssecmp")
1862 (set_attr "mode" "DF")])
1863
1864 (define_insn "sse2_vmmaskcmpv2df3"
1865 [(set (match_operand:V2DF 0 "register_operand" "=x")
1866 (vec_merge:V2DF
1867 (match_operator:V2DF 3 "sse_comparison_operator"
1868 [(match_operand:V2DF 1 "register_operand" "0")
1869 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1870 (match_dup 1)
1871 (const_int 1)))]
1872 "TARGET_SSE2"
1873 "cmp%D3sd\t{%2, %0|%0, %2}"
1874 [(set_attr "type" "ssecmp")
1875 (set_attr "mode" "DF")])
1876
1877 (define_insn "sse2_comi"
1878 [(set (reg:CCFP FLAGS_REG)
1879 (compare:CCFP
1880 (vec_select:DF
1881 (match_operand:V2DF 0 "register_operand" "x")
1882 (parallel [(const_int 0)]))
1883 (vec_select:DF
1884 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1885 (parallel [(const_int 0)]))))]
1886 "TARGET_SSE2"
1887 "comisd\t{%1, %0|%0, %1}"
1888 [(set_attr "type" "ssecomi")
1889 (set_attr "mode" "DF")])
1890
1891 (define_insn "sse2_ucomi"
1892 [(set (reg:CCFPU FLAGS_REG)
1893 (compare:CCFPU
1894 (vec_select:DF
1895 (match_operand:V2DF 0 "register_operand" "x")
1896 (parallel [(const_int 0)]))
1897 (vec_select:DF
1898 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1899 (parallel [(const_int 0)]))))]
1900 "TARGET_SSE2"
1901 "ucomisd\t{%1, %0|%0, %1}"
1902 [(set_attr "type" "ssecomi")
1903 (set_attr "mode" "DF")])
1904
1905 (define_expand "vcondv2df"
1906 [(set (match_operand:V2DF 0 "register_operand" "")
1907 (if_then_else:V2DF
1908 (match_operator 3 ""
1909 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1910 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1911 (match_operand:V2DF 1 "general_operand" "")
1912 (match_operand:V2DF 2 "general_operand" "")))]
1913 "TARGET_SSE2"
1914 {
1915 if (ix86_expand_fp_vcond (operands))
1916 DONE;
1917 else
1918 FAIL;
1919 })
1920
1921 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1922 ;;
1923 ;; Parallel double-precision floating point logical operations
1924 ;;
1925 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1926
1927 (define_expand "andv2df3"
1928 [(set (match_operand:V2DF 0 "register_operand" "")
1929 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1930 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1931 "TARGET_SSE2"
1932 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1933
1934 (define_insn "*andv2df3"
1935 [(set (match_operand:V2DF 0 "register_operand" "=x")
1936 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1937 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1938 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1939 "andpd\t{%2, %0|%0, %2}"
1940 [(set_attr "type" "sselog")
1941 (set_attr "mode" "V2DF")])
1942
1943 (define_insn "sse2_nandv2df3"
1944 [(set (match_operand:V2DF 0 "register_operand" "=x")
1945 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1946 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1947 "TARGET_SSE2"
1948 "andnpd\t{%2, %0|%0, %2}"
1949 [(set_attr "type" "sselog")
1950 (set_attr "mode" "V2DF")])
1951
1952 (define_expand "iorv2df3"
1953 [(set (match_operand:V2DF 0 "register_operand" "")
1954 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1955 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1956 "TARGET_SSE2"
1957 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1958
1959 (define_insn "*iorv2df3"
1960 [(set (match_operand:V2DF 0 "register_operand" "=x")
1961 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1962 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1963 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1964 "orpd\t{%2, %0|%0, %2}"
1965 [(set_attr "type" "sselog")
1966 (set_attr "mode" "V2DF")])
1967
1968 (define_expand "xorv2df3"
1969 [(set (match_operand:V2DF 0 "register_operand" "")
1970 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1971 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1972 "TARGET_SSE2"
1973 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1974
1975 (define_insn "*xorv2df3"
1976 [(set (match_operand:V2DF 0 "register_operand" "=x")
1977 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1978 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1979 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1980 "xorpd\t{%2, %0|%0, %2}"
1981 [(set_attr "type" "sselog")
1982 (set_attr "mode" "V2DF")])
1983
1984 ;; Also define scalar versions. These are used for abs, neg, and
1985 ;; conditional move. Using subregs into vector modes causes register
1986 ;; allocation lossage. These patterns do not allow memory operands
1987 ;; because the native instructions read the full 128-bits.
1988
1989 (define_insn "*anddf3"
1990 [(set (match_operand:DF 0 "register_operand" "=x")
1991 (and:DF (match_operand:DF 1 "register_operand" "0")
1992 (match_operand:DF 2 "register_operand" "x")))]
1993 "TARGET_SSE2"
1994 "andpd\t{%2, %0|%0, %2}"
1995 [(set_attr "type" "sselog")
1996 (set_attr "mode" "V2DF")])
1997
1998 (define_insn "*nanddf3"
1999 [(set (match_operand:DF 0 "register_operand" "=x")
2000 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
2001 (match_operand:DF 2 "register_operand" "x")))]
2002 "TARGET_SSE2"
2003 "andnpd\t{%2, %0|%0, %2}"
2004 [(set_attr "type" "sselog")
2005 (set_attr "mode" "V2DF")])
2006
2007 (define_insn "*iordf3"
2008 [(set (match_operand:DF 0 "register_operand" "=x")
2009 (ior:DF (match_operand:DF 1 "register_operand" "0")
2010 (match_operand:DF 2 "register_operand" "x")))]
2011 "TARGET_SSE2"
2012 "orpd\t{%2, %0|%0, %2}"
2013 [(set_attr "type" "sselog")
2014 (set_attr "mode" "V2DF")])
2015
2016 (define_insn "*xordf3"
2017 [(set (match_operand:DF 0 "register_operand" "=x")
2018 (xor:DF (match_operand:DF 1 "register_operand" "0")
2019 (match_operand:DF 2 "register_operand" "x")))]
2020 "TARGET_SSE2"
2021 "xorpd\t{%2, %0|%0, %2}"
2022 [(set_attr "type" "sselog")
2023 (set_attr "mode" "V2DF")])
2024
2025 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2026 ;;
2027 ;; Parallel double-precision floating point conversion operations
2028 ;;
2029 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2030
2031 (define_insn "sse2_cvtpi2pd"
2032 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2033 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2034 "TARGET_SSE2"
2035 "cvtpi2pd\t{%1, %0|%0, %1}"
2036 [(set_attr "type" "ssecvt")
2037 (set_attr "unit" "mmx,*")
2038 (set_attr "mode" "V2DF")])
2039
2040 (define_insn "sse2_cvtpd2pi"
2041 [(set (match_operand:V2SI 0 "register_operand" "=y")
2042 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2043 UNSPEC_FIX_NOTRUNC))]
2044 "TARGET_SSE2"
2045 "cvtpd2pi\t{%1, %0|%0, %1}"
2046 [(set_attr "type" "ssecvt")
2047 (set_attr "unit" "mmx")
2048 (set_attr "prefix_data16" "1")
2049 (set_attr "mode" "DI")])
2050
2051 (define_insn "sse2_cvttpd2pi"
2052 [(set (match_operand:V2SI 0 "register_operand" "=y")
2053 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2054 "TARGET_SSE2"
2055 "cvttpd2pi\t{%1, %0|%0, %1}"
2056 [(set_attr "type" "ssecvt")
2057 (set_attr "unit" "mmx")
2058 (set_attr "prefix_data16" "1")
2059 (set_attr "mode" "TI")])
2060
2061 (define_insn "sse2_cvtsi2sd"
2062 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2063 (vec_merge:V2DF
2064 (vec_duplicate:V2DF
2065 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2066 (match_operand:V2DF 1 "register_operand" "0,0")
2067 (const_int 1)))]
2068 "TARGET_SSE2"
2069 "cvtsi2sd\t{%2, %0|%0, %2}"
2070 [(set_attr "type" "sseicvt")
2071 (set_attr "mode" "DF")
2072 (set_attr "athlon_decode" "double,direct")
2073 (set_attr "amdfam10_decode" "vector,double")])
2074
2075 (define_insn "sse2_cvtsi2sdq"
2076 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2077 (vec_merge:V2DF
2078 (vec_duplicate:V2DF
2079 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2080 (match_operand:V2DF 1 "register_operand" "0,0")
2081 (const_int 1)))]
2082 "TARGET_SSE2 && TARGET_64BIT"
2083 "cvtsi2sdq\t{%2, %0|%0, %2}"
2084 [(set_attr "type" "sseicvt")
2085 (set_attr "mode" "DF")
2086 (set_attr "athlon_decode" "double,direct")
2087 (set_attr "amdfam10_decode" "vector,double")])
2088
2089 (define_insn "sse2_cvtsd2si"
2090 [(set (match_operand:SI 0 "register_operand" "=r,r")
2091 (unspec:SI
2092 [(vec_select:DF
2093 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2094 (parallel [(const_int 0)]))]
2095 UNSPEC_FIX_NOTRUNC))]
2096 "TARGET_SSE2"
2097 "cvtsd2si\t{%1, %0|%0, %1}"
2098 [(set_attr "type" "sseicvt")
2099 (set_attr "athlon_decode" "double,vector")
2100 (set_attr "prefix_rep" "1")
2101 (set_attr "mode" "SI")])
2102
2103 (define_insn "sse2_cvtsd2si_2"
2104 [(set (match_operand:SI 0 "register_operand" "=r,r")
2105 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2106 UNSPEC_FIX_NOTRUNC))]
2107 "TARGET_SSE2"
2108 "cvtsd2si\t{%1, %0|%0, %1}"
2109 [(set_attr "type" "sseicvt")
2110 (set_attr "athlon_decode" "double,vector")
2111 (set_attr "amdfam10_decode" "double,double")
2112 (set_attr "prefix_rep" "1")
2113 (set_attr "mode" "SI")])
2114
2115 (define_insn "sse2_cvtsd2siq"
2116 [(set (match_operand:DI 0 "register_operand" "=r,r")
2117 (unspec:DI
2118 [(vec_select:DF
2119 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2120 (parallel [(const_int 0)]))]
2121 UNSPEC_FIX_NOTRUNC))]
2122 "TARGET_SSE2 && TARGET_64BIT"
2123 "cvtsd2siq\t{%1, %0|%0, %1}"
2124 [(set_attr "type" "sseicvt")
2125 (set_attr "athlon_decode" "double,vector")
2126 (set_attr "prefix_rep" "1")
2127 (set_attr "mode" "DI")])
2128
2129 (define_insn "sse2_cvtsd2siq_2"
2130 [(set (match_operand:DI 0 "register_operand" "=r,r")
2131 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2132 UNSPEC_FIX_NOTRUNC))]
2133 "TARGET_SSE2 && TARGET_64BIT"
2134 "cvtsd2siq\t{%1, %0|%0, %1}"
2135 [(set_attr "type" "sseicvt")
2136 (set_attr "athlon_decode" "double,vector")
2137 (set_attr "amdfam10_decode" "double,double")
2138 (set_attr "prefix_rep" "1")
2139 (set_attr "mode" "DI")])
2140
2141 (define_insn "sse2_cvttsd2si"
2142 [(set (match_operand:SI 0 "register_operand" "=r,r")
2143 (fix:SI
2144 (vec_select:DF
2145 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2146 (parallel [(const_int 0)]))))]
2147 "TARGET_SSE2"
2148 "cvttsd2si\t{%1, %0|%0, %1}"
2149 [(set_attr "type" "sseicvt")
2150 (set_attr "prefix_rep" "1")
2151 (set_attr "mode" "SI")
2152 (set_attr "athlon_decode" "double,vector")
2153 (set_attr "amdfam10_decode" "double,double")])
2154
2155 (define_insn "sse2_cvttsd2siq"
2156 [(set (match_operand:DI 0 "register_operand" "=r,r")
2157 (fix:DI
2158 (vec_select:DF
2159 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2160 (parallel [(const_int 0)]))))]
2161 "TARGET_SSE2 && TARGET_64BIT"
2162 "cvttsd2siq\t{%1, %0|%0, %1}"
2163 [(set_attr "type" "sseicvt")
2164 (set_attr "prefix_rep" "1")
2165 (set_attr "mode" "DI")
2166 (set_attr "athlon_decode" "double,vector")
2167 (set_attr "amdfam10_decode" "double,double")])
2168
2169 (define_insn "sse2_cvtdq2pd"
2170 [(set (match_operand:V2DF 0 "register_operand" "=x")
2171 (float:V2DF
2172 (vec_select:V2SI
2173 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2174 (parallel [(const_int 0) (const_int 1)]))))]
2175 "TARGET_SSE2"
2176 "cvtdq2pd\t{%1, %0|%0, %1}"
2177 [(set_attr "type" "ssecvt")
2178 (set_attr "mode" "V2DF")])
2179
2180 (define_expand "sse2_cvtpd2dq"
2181 [(set (match_operand:V4SI 0 "register_operand" "")
2182 (vec_concat:V4SI
2183 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2184 UNSPEC_FIX_NOTRUNC)
2185 (match_dup 2)))]
2186 "TARGET_SSE2"
2187 "operands[2] = CONST0_RTX (V2SImode);")
2188
2189 (define_insn "*sse2_cvtpd2dq"
2190 [(set (match_operand:V4SI 0 "register_operand" "=x")
2191 (vec_concat:V4SI
2192 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2193 UNSPEC_FIX_NOTRUNC)
2194 (match_operand:V2SI 2 "const0_operand" "")))]
2195 "TARGET_SSE2"
2196 "cvtpd2dq\t{%1, %0|%0, %1}"
2197 [(set_attr "type" "ssecvt")
2198 (set_attr "prefix_rep" "1")
2199 (set_attr "mode" "TI")
2200 (set_attr "amdfam10_decode" "double")])
2201
2202 (define_expand "sse2_cvttpd2dq"
2203 [(set (match_operand:V4SI 0 "register_operand" "")
2204 (vec_concat:V4SI
2205 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2206 (match_dup 2)))]
2207 "TARGET_SSE2"
2208 "operands[2] = CONST0_RTX (V2SImode);")
2209
2210 (define_insn "*sse2_cvttpd2dq"
2211 [(set (match_operand:V4SI 0 "register_operand" "=x")
2212 (vec_concat:V4SI
2213 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2214 (match_operand:V2SI 2 "const0_operand" "")))]
2215 "TARGET_SSE2"
2216 "cvttpd2dq\t{%1, %0|%0, %1}"
2217 [(set_attr "type" "ssecvt")
2218 (set_attr "prefix_rep" "1")
2219 (set_attr "mode" "TI")
2220 (set_attr "amdfam10_decode" "double")])
2221
2222 (define_insn "sse2_cvtsd2ss"
2223 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2224 (vec_merge:V4SF
2225 (vec_duplicate:V4SF
2226 (float_truncate:V2SF
2227 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2228 (match_operand:V4SF 1 "register_operand" "0,0")
2229 (const_int 1)))]
2230 "TARGET_SSE2"
2231 "cvtsd2ss\t{%2, %0|%0, %2}"
2232 [(set_attr "type" "ssecvt")
2233 (set_attr "athlon_decode" "vector,double")
2234 (set_attr "amdfam10_decode" "vector,double")
2235 (set_attr "mode" "SF")])
2236
2237 (define_insn "sse2_cvtss2sd"
2238 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2239 (vec_merge:V2DF
2240 (float_extend:V2DF
2241 (vec_select:V2SF
2242 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2243 (parallel [(const_int 0) (const_int 1)])))
2244 (match_operand:V2DF 1 "register_operand" "0,0")
2245 (const_int 1)))]
2246 "TARGET_SSE2"
2247 "cvtss2sd\t{%2, %0|%0, %2}"
2248 [(set_attr "type" "ssecvt")
2249 (set_attr "amdfam10_decode" "vector,double")
2250 (set_attr "mode" "DF")])
2251
2252 (define_expand "sse2_cvtpd2ps"
2253 [(set (match_operand:V4SF 0 "register_operand" "")
2254 (vec_concat:V4SF
2255 (float_truncate:V2SF
2256 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2257 (match_dup 2)))]
2258 "TARGET_SSE2"
2259 "operands[2] = CONST0_RTX (V2SFmode);")
2260
2261 (define_insn "*sse2_cvtpd2ps"
2262 [(set (match_operand:V4SF 0 "register_operand" "=x")
2263 (vec_concat:V4SF
2264 (float_truncate:V2SF
2265 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2266 (match_operand:V2SF 2 "const0_operand" "")))]
2267 "TARGET_SSE2"
2268 "cvtpd2ps\t{%1, %0|%0, %1}"
2269 [(set_attr "type" "ssecvt")
2270 (set_attr "prefix_data16" "1")
2271 (set_attr "mode" "V4SF")
2272 (set_attr "amdfam10_decode" "double")])
2273
2274 (define_insn "sse2_cvtps2pd"
2275 [(set (match_operand:V2DF 0 "register_operand" "=x")
2276 (float_extend:V2DF
2277 (vec_select:V2SF
2278 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2279 (parallel [(const_int 0) (const_int 1)]))))]
2280 "TARGET_SSE2"
2281 "cvtps2pd\t{%1, %0|%0, %1}"
2282 [(set_attr "type" "ssecvt")
2283 (set_attr "mode" "V2DF")
2284 (set_attr "amdfam10_decode" "direct")])
2285
2286 (define_expand "vec_unpacks_hi_v4sf"
2287 [(set (match_dup 2)
2288 (vec_select:V4SF
2289 (vec_concat:V8SF
2290 (match_dup 2)
2291 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2292 (parallel [(const_int 6)
2293 (const_int 7)
2294 (const_int 2)
2295 (const_int 3)])))
2296 (set (match_operand:V2DF 0 "register_operand" "")
2297 (float_extend:V2DF
2298 (vec_select:V2SF
2299 (match_dup 2)
2300 (parallel [(const_int 0) (const_int 1)]))))]
2301 "TARGET_SSE2"
2302 {
2303 operands[2] = gen_reg_rtx (V4SFmode);
2304 })
2305
2306 (define_expand "vec_unpacks_lo_v4sf"
2307 [(set (match_operand:V2DF 0 "register_operand" "")
2308 (float_extend:V2DF
2309 (vec_select:V2SF
2310 (match_operand:V4SF 1 "nonimmediate_operand" "")
2311 (parallel [(const_int 0) (const_int 1)]))))]
2312 "TARGET_SSE2")
2313
2314 (define_expand "vec_unpacks_float_hi_v8hi"
2315 [(match_operand:V4SF 0 "register_operand" "")
2316 (match_operand:V8HI 1 "register_operand" "")]
2317 "TARGET_SSE2"
2318 {
2319 rtx tmp = gen_reg_rtx (V4SImode);
2320
2321 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2322 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2323 DONE;
2324 })
2325
2326 (define_expand "vec_unpacks_float_lo_v8hi"
2327 [(match_operand:V4SF 0 "register_operand" "")
2328 (match_operand:V8HI 1 "register_operand" "")]
2329 "TARGET_SSE2"
2330 {
2331 rtx tmp = gen_reg_rtx (V4SImode);
2332
2333 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2334 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2335 DONE;
2336 })
2337
2338 (define_expand "vec_unpacku_float_hi_v8hi"
2339 [(match_operand:V4SF 0 "register_operand" "")
2340 (match_operand:V8HI 1 "register_operand" "")]
2341 "TARGET_SSE2"
2342 {
2343 rtx tmp = gen_reg_rtx (V4SImode);
2344
2345 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2346 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2347 DONE;
2348 })
2349
2350 (define_expand "vec_unpacku_float_lo_v8hi"
2351 [(match_operand:V4SF 0 "register_operand" "")
2352 (match_operand:V8HI 1 "register_operand" "")]
2353 "TARGET_SSE2"
2354 {
2355 rtx tmp = gen_reg_rtx (V4SImode);
2356
2357 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2358 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2359 DONE;
2360 })
2361
2362 (define_expand "vec_unpacks_float_hi_v4si"
2363 [(set (match_dup 2)
2364 (vec_select:V4SI
2365 (match_operand:V4SI 1 "nonimmediate_operand" "")
2366 (parallel [(const_int 2)
2367 (const_int 3)
2368 (const_int 2)
2369 (const_int 3)])))
2370 (set (match_operand:V2DF 0 "register_operand" "")
2371 (float:V2DF
2372 (vec_select:V2SI
2373 (match_dup 2)
2374 (parallel [(const_int 0) (const_int 1)]))))]
2375 "TARGET_SSE2"
2376 {
2377 operands[2] = gen_reg_rtx (V4SImode);
2378 })
2379
2380 (define_expand "vec_unpacks_float_lo_v4si"
2381 [(set (match_operand:V2DF 0 "register_operand" "")
2382 (float:V2DF
2383 (vec_select:V2SI
2384 (match_operand:V4SI 1 "nonimmediate_operand" "")
2385 (parallel [(const_int 0) (const_int 1)]))))]
2386 "TARGET_SSE2")
2387
2388 (define_expand "vec_pack_trunc_v2df"
2389 [(match_operand:V4SF 0 "register_operand" "")
2390 (match_operand:V2DF 1 "nonimmediate_operand" "")
2391 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2392 "TARGET_SSE2"
2393 {
2394 rtx r1, r2;
2395
2396 r1 = gen_reg_rtx (V4SFmode);
2397 r2 = gen_reg_rtx (V4SFmode);
2398
2399 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2400 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2401 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2402 DONE;
2403 })
2404
2405 (define_expand "vec_pack_sfix_trunc_v2df"
2406 [(match_operand:V4SI 0 "register_operand" "")
2407 (match_operand:V2DF 1 "nonimmediate_operand" "")
2408 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2409 "TARGET_SSE2"
2410 {
2411 rtx r1, r2;
2412
2413 r1 = gen_reg_rtx (V4SImode);
2414 r2 = gen_reg_rtx (V4SImode);
2415
2416 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2417 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2418 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2419 gen_lowpart (V2DImode, r1),
2420 gen_lowpart (V2DImode, r2)));
2421 DONE;
2422 })
2423
2424 (define_expand "vec_pack_sfix_v2df"
2425 [(match_operand:V4SI 0 "register_operand" "")
2426 (match_operand:V2DF 1 "nonimmediate_operand" "")
2427 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2428 "TARGET_SSE2"
2429 {
2430 rtx r1, r2;
2431
2432 r1 = gen_reg_rtx (V4SImode);
2433 r2 = gen_reg_rtx (V4SImode);
2434
2435 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2436 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2437 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2438 gen_lowpart (V2DImode, r1),
2439 gen_lowpart (V2DImode, r2)));
2440 DONE;
2441 })
2442
2443
2444 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2445 ;;
2446 ;; Parallel double-precision floating point element swizzling
2447 ;;
2448 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2449
2450 (define_insn "sse2_unpckhpd"
2451 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2452 (vec_select:V2DF
2453 (vec_concat:V4DF
2454 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2455 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2456 (parallel [(const_int 1)
2457 (const_int 3)])))]
2458 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2459 "@
2460 unpckhpd\t{%2, %0|%0, %2}
2461 movlpd\t{%H1, %0|%0, %H1}
2462 movhpd\t{%1, %0|%0, %1}"
2463 [(set_attr "type" "sselog,ssemov,ssemov")
2464 (set_attr "mode" "V2DF,V1DF,V1DF")])
2465
2466 (define_insn "*sse3_movddup"
2467 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2468 (vec_select:V2DF
2469 (vec_concat:V4DF
2470 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2471 (match_dup 1))
2472 (parallel [(const_int 0)
2473 (const_int 2)])))]
2474 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2475 "@
2476 movddup\t{%1, %0|%0, %1}
2477 #"
2478 [(set_attr "type" "sselog1,ssemov")
2479 (set_attr "mode" "V2DF")])
2480
2481 (define_split
2482 [(set (match_operand:V2DF 0 "memory_operand" "")
2483 (vec_select:V2DF
2484 (vec_concat:V4DF
2485 (match_operand:V2DF 1 "register_operand" "")
2486 (match_dup 1))
2487 (parallel [(const_int 0)
2488 (const_int 2)])))]
2489 "TARGET_SSE3 && reload_completed"
2490 [(const_int 0)]
2491 {
2492 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2493 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2494 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2495 DONE;
2496 })
2497
2498 (define_insn "sse2_unpcklpd"
2499 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2500 (vec_select:V2DF
2501 (vec_concat:V4DF
2502 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2503 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2504 (parallel [(const_int 0)
2505 (const_int 2)])))]
2506 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2507 "@
2508 unpcklpd\t{%2, %0|%0, %2}
2509 movhpd\t{%2, %0|%0, %2}
2510 movlpd\t{%2, %H0|%H0, %2}"
2511 [(set_attr "type" "sselog,ssemov,ssemov")
2512 (set_attr "mode" "V2DF,V1DF,V1DF")])
2513
2514 (define_expand "sse2_shufpd"
2515 [(match_operand:V2DF 0 "register_operand" "")
2516 (match_operand:V2DF 1 "register_operand" "")
2517 (match_operand:V2DF 2 "nonimmediate_operand" "")
2518 (match_operand:SI 3 "const_int_operand" "")]
2519 "TARGET_SSE2"
2520 {
2521 int mask = INTVAL (operands[3]);
2522 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2523 GEN_INT (mask & 1),
2524 GEN_INT (mask & 2 ? 3 : 2)));
2525 DONE;
2526 })
2527
2528 (define_insn "sse2_shufpd_1"
2529 [(set (match_operand:V2DF 0 "register_operand" "=x")
2530 (vec_select:V2DF
2531 (vec_concat:V4DF
2532 (match_operand:V2DF 1 "register_operand" "0")
2533 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2534 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2535 (match_operand 4 "const_2_to_3_operand" "")])))]
2536 "TARGET_SSE2"
2537 {
2538 int mask;
2539 mask = INTVAL (operands[3]);
2540 mask |= (INTVAL (operands[4]) - 2) << 1;
2541 operands[3] = GEN_INT (mask);
2542
2543 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2544 }
2545 [(set_attr "type" "sselog")
2546 (set_attr "mode" "V2DF")])
2547
2548 (define_insn "sse2_storehpd"
2549 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2550 (vec_select:DF
2551 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2552 (parallel [(const_int 1)])))]
2553 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2554 "@
2555 movhpd\t{%1, %0|%0, %1}
2556 unpckhpd\t%0, %0
2557 #"
2558 [(set_attr "type" "ssemov,sselog1,ssemov")
2559 (set_attr "mode" "V1DF,V2DF,DF")])
2560
2561 (define_split
2562 [(set (match_operand:DF 0 "register_operand" "")
2563 (vec_select:DF
2564 (match_operand:V2DF 1 "memory_operand" "")
2565 (parallel [(const_int 1)])))]
2566 "TARGET_SSE2 && reload_completed"
2567 [(set (match_dup 0) (match_dup 1))]
2568 {
2569 operands[1] = adjust_address (operands[1], DFmode, 8);
2570 })
2571
2572 (define_insn "sse2_storelpd"
2573 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2574 (vec_select:DF
2575 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2576 (parallel [(const_int 0)])))]
2577 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2578 "@
2579 movlpd\t{%1, %0|%0, %1}
2580 #
2581 #"
2582 [(set_attr "type" "ssemov")
2583 (set_attr "mode" "V1DF,DF,DF")])
2584
2585 (define_split
2586 [(set (match_operand:DF 0 "register_operand" "")
2587 (vec_select:DF
2588 (match_operand:V2DF 1 "nonimmediate_operand" "")
2589 (parallel [(const_int 0)])))]
2590 "TARGET_SSE2 && reload_completed"
2591 [(const_int 0)]
2592 {
2593 rtx op1 = operands[1];
2594 if (REG_P (op1))
2595 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2596 else
2597 op1 = gen_lowpart (DFmode, op1);
2598 emit_move_insn (operands[0], op1);
2599 DONE;
2600 })
2601
2602 (define_insn "sse2_loadhpd"
2603 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2604 (vec_concat:V2DF
2605 (vec_select:DF
2606 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2607 (parallel [(const_int 0)]))
2608 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2609 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2610 "@
2611 movhpd\t{%2, %0|%0, %2}
2612 unpcklpd\t{%2, %0|%0, %2}
2613 shufpd\t{$1, %1, %0|%0, %1, 1}
2614 #"
2615 [(set_attr "type" "ssemov,sselog,sselog,other")
2616 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2617
2618 (define_split
2619 [(set (match_operand:V2DF 0 "memory_operand" "")
2620 (vec_concat:V2DF
2621 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2622 (match_operand:DF 1 "register_operand" "")))]
2623 "TARGET_SSE2 && reload_completed"
2624 [(set (match_dup 0) (match_dup 1))]
2625 {
2626 operands[0] = adjust_address (operands[0], DFmode, 8);
2627 })
2628
2629 (define_insn "sse2_loadlpd"
2630 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2631 (vec_concat:V2DF
2632 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2633 (vec_select:DF
2634 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2635 (parallel [(const_int 1)]))))]
2636 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2637 "@
2638 movsd\t{%2, %0|%0, %2}
2639 movlpd\t{%2, %0|%0, %2}
2640 movsd\t{%2, %0|%0, %2}
2641 shufpd\t{$2, %2, %0|%0, %2, 2}
2642 movhpd\t{%H1, %0|%0, %H1}
2643 #"
2644 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2645 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2646
2647 (define_split
2648 [(set (match_operand:V2DF 0 "memory_operand" "")
2649 (vec_concat:V2DF
2650 (match_operand:DF 1 "register_operand" "")
2651 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2652 "TARGET_SSE2 && reload_completed"
2653 [(set (match_dup 0) (match_dup 1))]
2654 {
2655 operands[0] = adjust_address (operands[0], DFmode, 8);
2656 })
2657
2658 ;; Not sure these two are ever used, but it doesn't hurt to have
2659 ;; them. -aoliva
2660 (define_insn "*vec_extractv2df_1_sse"
2661 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2662 (vec_select:DF
2663 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2664 (parallel [(const_int 1)])))]
2665 "!TARGET_SSE2 && TARGET_SSE
2666 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2667 "@
2668 movhps\t{%1, %0|%0, %1}
2669 movhlps\t{%1, %0|%0, %1}
2670 movlps\t{%H1, %0|%0, %H1}"
2671 [(set_attr "type" "ssemov")
2672 (set_attr "mode" "V2SF,V4SF,V2SF")])
2673
2674 (define_insn "*vec_extractv2df_0_sse"
2675 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2676 (vec_select:DF
2677 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2678 (parallel [(const_int 0)])))]
2679 "!TARGET_SSE2 && TARGET_SSE
2680 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2681 "@
2682 movlps\t{%1, %0|%0, %1}
2683 movaps\t{%1, %0|%0, %1}
2684 movlps\t{%1, %0|%0, %1}"
2685 [(set_attr "type" "ssemov")
2686 (set_attr "mode" "V2SF,V4SF,V2SF")])
2687
2688 (define_insn "sse2_movsd"
2689 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2690 (vec_merge:V2DF
2691 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2692 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2693 (const_int 1)))]
2694 "TARGET_SSE2"
2695 "@
2696 movsd\t{%2, %0|%0, %2}
2697 movlpd\t{%2, %0|%0, %2}
2698 movlpd\t{%2, %0|%0, %2}
2699 shufpd\t{$2, %2, %0|%0, %2, 2}
2700 movhps\t{%H1, %0|%0, %H1}
2701 movhps\t{%1, %H0|%H0, %1}"
2702 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2703 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2704
2705 (define_insn "*vec_dupv2df_sse3"
2706 [(set (match_operand:V2DF 0 "register_operand" "=x")
2707 (vec_duplicate:V2DF
2708 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2709 "TARGET_SSE3"
2710 "movddup\t{%1, %0|%0, %1}"
2711 [(set_attr "type" "sselog1")
2712 (set_attr "mode" "DF")])
2713
2714 (define_insn "*vec_dupv2df"
2715 [(set (match_operand:V2DF 0 "register_operand" "=x")
2716 (vec_duplicate:V2DF
2717 (match_operand:DF 1 "register_operand" "0")))]
2718 "TARGET_SSE2"
2719 "unpcklpd\t%0, %0"
2720 [(set_attr "type" "sselog1")
2721 (set_attr "mode" "V2DF")])
2722
2723 (define_insn "*vec_concatv2df_sse3"
2724 [(set (match_operand:V2DF 0 "register_operand" "=x")
2725 (vec_concat:V2DF
2726 (match_operand:DF 1 "nonimmediate_operand" "xm")
2727 (match_dup 1)))]
2728 "TARGET_SSE3"
2729 "movddup\t{%1, %0|%0, %1}"
2730 [(set_attr "type" "sselog1")
2731 (set_attr "mode" "DF")])
2732
2733 (define_insn "*vec_concatv2df"
2734 [(set (match_operand:V2DF 0 "register_operand" "=Yt,Yt,Yt,x,x")
2735 (vec_concat:V2DF
2736 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2737 (match_operand:DF 2 "vector_move_operand" " Yt,m ,C ,x,m")))]
2738 "TARGET_SSE"
2739 "@
2740 unpcklpd\t{%2, %0|%0, %2}
2741 movhpd\t{%2, %0|%0, %2}
2742 movsd\t{%1, %0|%0, %1}
2743 movlhps\t{%2, %0|%0, %2}
2744 movhps\t{%2, %0|%0, %2}"
2745 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2746 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2747
2748 (define_expand "vec_setv2df"
2749 [(match_operand:V2DF 0 "register_operand" "")
2750 (match_operand:DF 1 "register_operand" "")
2751 (match_operand 2 "const_int_operand" "")]
2752 "TARGET_SSE"
2753 {
2754 ix86_expand_vector_set (false, operands[0], operands[1],
2755 INTVAL (operands[2]));
2756 DONE;
2757 })
2758
2759 (define_expand "vec_extractv2df"
2760 [(match_operand:DF 0 "register_operand" "")
2761 (match_operand:V2DF 1 "register_operand" "")
2762 (match_operand 2 "const_int_operand" "")]
2763 "TARGET_SSE"
2764 {
2765 ix86_expand_vector_extract (false, operands[0], operands[1],
2766 INTVAL (operands[2]));
2767 DONE;
2768 })
2769
2770 (define_expand "vec_initv2df"
2771 [(match_operand:V2DF 0 "register_operand" "")
2772 (match_operand 1 "" "")]
2773 "TARGET_SSE"
2774 {
2775 ix86_expand_vector_init (false, operands[0], operands[1]);
2776 DONE;
2777 })
2778
2779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2780 ;;
2781 ;; Parallel integral arithmetic
2782 ;;
2783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2784
2785 (define_expand "neg<mode>2"
2786 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2787 (minus:SSEMODEI
2788 (match_dup 2)
2789 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2790 "TARGET_SSE2"
2791 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2792
2793 (define_expand "add<mode>3"
2794 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2795 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2796 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2797 "TARGET_SSE2"
2798 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2799
2800 (define_insn "*add<mode>3"
2801 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2802 (plus:SSEMODEI
2803 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2804 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2805 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2806 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2807 [(set_attr "type" "sseiadd")
2808 (set_attr "prefix_data16" "1")
2809 (set_attr "mode" "TI")])
2810
2811 (define_insn "sse2_ssadd<mode>3"
2812 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2813 (ss_plus:SSEMODE12
2814 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2815 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2816 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2817 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2818 [(set_attr "type" "sseiadd")
2819 (set_attr "prefix_data16" "1")
2820 (set_attr "mode" "TI")])
2821
2822 (define_insn "sse2_usadd<mode>3"
2823 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2824 (us_plus:SSEMODE12
2825 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2826 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2827 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2828 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2829 [(set_attr "type" "sseiadd")
2830 (set_attr "prefix_data16" "1")
2831 (set_attr "mode" "TI")])
2832
2833 (define_expand "sub<mode>3"
2834 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2835 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2836 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2837 "TARGET_SSE2"
2838 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2839
2840 (define_insn "*sub<mode>3"
2841 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2842 (minus:SSEMODEI
2843 (match_operand:SSEMODEI 1 "register_operand" "0")
2844 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2845 "TARGET_SSE2"
2846 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2847 [(set_attr "type" "sseiadd")
2848 (set_attr "prefix_data16" "1")
2849 (set_attr "mode" "TI")])
2850
2851 (define_insn "sse2_sssub<mode>3"
2852 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2853 (ss_minus:SSEMODE12
2854 (match_operand:SSEMODE12 1 "register_operand" "0")
2855 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2856 "TARGET_SSE2"
2857 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2858 [(set_attr "type" "sseiadd")
2859 (set_attr "prefix_data16" "1")
2860 (set_attr "mode" "TI")])
2861
2862 (define_insn "sse2_ussub<mode>3"
2863 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2864 (us_minus:SSEMODE12
2865 (match_operand:SSEMODE12 1 "register_operand" "0")
2866 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2867 "TARGET_SSE2"
2868 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2869 [(set_attr "type" "sseiadd")
2870 (set_attr "prefix_data16" "1")
2871 (set_attr "mode" "TI")])
2872
2873 (define_expand "mulv16qi3"
2874 [(set (match_operand:V16QI 0 "register_operand" "")
2875 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2876 (match_operand:V16QI 2 "register_operand" "")))]
2877 "TARGET_SSE2"
2878 {
2879 rtx t[12], op0;
2880 int i;
2881
2882 for (i = 0; i < 12; ++i)
2883 t[i] = gen_reg_rtx (V16QImode);
2884
2885 /* Unpack data such that we've got a source byte in each low byte of
2886 each word. We don't care what goes into the high byte of each word.
2887 Rather than trying to get zero in there, most convenient is to let
2888 it be a copy of the low byte. */
2889 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2890 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2891 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2892 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2893
2894 /* Multiply words. The end-of-line annotations here give a picture of what
2895 the output of that instruction looks like. Dot means don't care; the
2896 letters are the bytes of the result with A being the most significant. */
2897 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2898 gen_lowpart (V8HImode, t[0]),
2899 gen_lowpart (V8HImode, t[1])));
2900 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2901 gen_lowpart (V8HImode, t[2]),
2902 gen_lowpart (V8HImode, t[3])));
2903
2904 /* Extract the relevant bytes and merge them back together. */
2905 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2906 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2907 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2908 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2909 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2910 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2911
2912 op0 = operands[0];
2913 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2914 DONE;
2915 })
2916
2917 (define_expand "mulv8hi3"
2918 [(set (match_operand:V8HI 0 "register_operand" "")
2919 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2920 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2921 "TARGET_SSE2"
2922 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2923
2924 (define_insn "*mulv8hi3"
2925 [(set (match_operand:V8HI 0 "register_operand" "=x")
2926 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2927 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2928 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2929 "pmullw\t{%2, %0|%0, %2}"
2930 [(set_attr "type" "sseimul")
2931 (set_attr "prefix_data16" "1")
2932 (set_attr "mode" "TI")])
2933
2934 (define_expand "smulv8hi3_highpart"
2935 [(set (match_operand:V8HI 0 "register_operand" "")
2936 (truncate:V8HI
2937 (lshiftrt:V8SI
2938 (mult:V8SI
2939 (sign_extend:V8SI
2940 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2941 (sign_extend:V8SI
2942 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2943 (const_int 16))))]
2944 "TARGET_SSE2"
2945 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2946
2947 (define_insn "*smulv8hi3_highpart"
2948 [(set (match_operand:V8HI 0 "register_operand" "=x")
2949 (truncate:V8HI
2950 (lshiftrt:V8SI
2951 (mult:V8SI
2952 (sign_extend:V8SI
2953 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2954 (sign_extend:V8SI
2955 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2956 (const_int 16))))]
2957 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2958 "pmulhw\t{%2, %0|%0, %2}"
2959 [(set_attr "type" "sseimul")
2960 (set_attr "prefix_data16" "1")
2961 (set_attr "mode" "TI")])
2962
2963 (define_expand "umulv8hi3_highpart"
2964 [(set (match_operand:V8HI 0 "register_operand" "")
2965 (truncate:V8HI
2966 (lshiftrt:V8SI
2967 (mult:V8SI
2968 (zero_extend:V8SI
2969 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2970 (zero_extend:V8SI
2971 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2972 (const_int 16))))]
2973 "TARGET_SSE2"
2974 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2975
2976 (define_insn "*umulv8hi3_highpart"
2977 [(set (match_operand:V8HI 0 "register_operand" "=x")
2978 (truncate:V8HI
2979 (lshiftrt:V8SI
2980 (mult:V8SI
2981 (zero_extend:V8SI
2982 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2983 (zero_extend:V8SI
2984 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2985 (const_int 16))))]
2986 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2987 "pmulhuw\t{%2, %0|%0, %2}"
2988 [(set_attr "type" "sseimul")
2989 (set_attr "prefix_data16" "1")
2990 (set_attr "mode" "TI")])
2991
2992 (define_insn "sse2_umulv2siv2di3"
2993 [(set (match_operand:V2DI 0 "register_operand" "=x")
2994 (mult:V2DI
2995 (zero_extend:V2DI
2996 (vec_select:V2SI
2997 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2998 (parallel [(const_int 0) (const_int 2)])))
2999 (zero_extend:V2DI
3000 (vec_select:V2SI
3001 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3002 (parallel [(const_int 0) (const_int 2)])))))]
3003 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3004 "pmuludq\t{%2, %0|%0, %2}"
3005 [(set_attr "type" "sseimul")
3006 (set_attr "prefix_data16" "1")
3007 (set_attr "mode" "TI")])
3008
3009 (define_insn "sse4_1_mulv2siv2di3"
3010 [(set (match_operand:V2DI 0 "register_operand" "=x")
3011 (mult:V2DI
3012 (sign_extend:V2DI
3013 (vec_select:V2SI
3014 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3015 (parallel [(const_int 0) (const_int 2)])))
3016 (sign_extend:V2DI
3017 (vec_select:V2SI
3018 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3019 (parallel [(const_int 0) (const_int 2)])))))]
3020 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3021 "pmuldq\t{%2, %0|%0, %2}"
3022 [(set_attr "type" "sseimul")
3023 (set_attr "prefix_extra" "1")
3024 (set_attr "mode" "TI")])
3025
3026 (define_insn "sse2_pmaddwd"
3027 [(set (match_operand:V4SI 0 "register_operand" "=x")
3028 (plus:V4SI
3029 (mult:V4SI
3030 (sign_extend:V4SI
3031 (vec_select:V4HI
3032 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3033 (parallel [(const_int 0)
3034 (const_int 2)
3035 (const_int 4)
3036 (const_int 6)])))
3037 (sign_extend:V4SI
3038 (vec_select:V4HI
3039 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3040 (parallel [(const_int 0)
3041 (const_int 2)
3042 (const_int 4)
3043 (const_int 6)]))))
3044 (mult:V4SI
3045 (sign_extend:V4SI
3046 (vec_select:V4HI (match_dup 1)
3047 (parallel [(const_int 1)
3048 (const_int 3)
3049 (const_int 5)
3050 (const_int 7)])))
3051 (sign_extend:V4SI
3052 (vec_select:V4HI (match_dup 2)
3053 (parallel [(const_int 1)
3054 (const_int 3)
3055 (const_int 5)
3056 (const_int 7)]))))))]
3057 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3058 "pmaddwd\t{%2, %0|%0, %2}"
3059 [(set_attr "type" "sseiadd")
3060 (set_attr "prefix_data16" "1")
3061 (set_attr "mode" "TI")])
3062
3063 (define_expand "mulv4si3"
3064 [(set (match_operand:V4SI 0 "register_operand" "")
3065 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3066 (match_operand:V4SI 2 "register_operand" "")))]
3067 "TARGET_SSE2"
3068 {
3069 if (TARGET_SSE4_1)
3070 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3071 else
3072 {
3073 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3074 rtx op0, op1, op2;
3075
3076 op0 = operands[0];
3077 op1 = operands[1];
3078 op2 = operands[2];
3079 t1 = gen_reg_rtx (V4SImode);
3080 t2 = gen_reg_rtx (V4SImode);
3081 t3 = gen_reg_rtx (V4SImode);
3082 t4 = gen_reg_rtx (V4SImode);
3083 t5 = gen_reg_rtx (V4SImode);
3084 t6 = gen_reg_rtx (V4SImode);
3085 thirtytwo = GEN_INT (32);
3086
3087 /* Multiply elements 2 and 0. */
3088 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3089 op1, op2));
3090
3091 /* Shift both input vectors down one element, so that elements 3
3092 and 1 are now in the slots for elements 2 and 0. For K8, at
3093 least, this is faster than using a shuffle. */
3094 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3095 gen_lowpart (TImode, op1),
3096 thirtytwo));
3097 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3098 gen_lowpart (TImode, op2),
3099 thirtytwo));
3100 /* Multiply elements 3 and 1. */
3101 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3102 t2, t3));
3103
3104 /* Move the results in element 2 down to element 1; we don't care
3105 what goes in elements 2 and 3. */
3106 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3107 const0_rtx, const0_rtx));
3108 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3109 const0_rtx, const0_rtx));
3110
3111 /* Merge the parts back together. */
3112 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3113 DONE;
3114 }
3115 })
3116
3117 (define_insn "*sse4_1_mulv4si3"
3118 [(set (match_operand:V4SI 0 "register_operand" "=x")
3119 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3120 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3121 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3122 "pmulld\t{%2, %0|%0, %2}"
3123 [(set_attr "type" "sseimul")
3124 (set_attr "prefix_extra" "1")
3125 (set_attr "mode" "TI")])
3126
3127 (define_expand "mulv2di3"
3128 [(set (match_operand:V2DI 0 "register_operand" "")
3129 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3130 (match_operand:V2DI 2 "register_operand" "")))]
3131 "TARGET_SSE2"
3132 {
3133 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3134 rtx op0, op1, op2;
3135
3136 op0 = operands[0];
3137 op1 = operands[1];
3138 op2 = operands[2];
3139 t1 = gen_reg_rtx (V2DImode);
3140 t2 = gen_reg_rtx (V2DImode);
3141 t3 = gen_reg_rtx (V2DImode);
3142 t4 = gen_reg_rtx (V2DImode);
3143 t5 = gen_reg_rtx (V2DImode);
3144 t6 = gen_reg_rtx (V2DImode);
3145 thirtytwo = GEN_INT (32);
3146
3147 /* Multiply low parts. */
3148 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3149 gen_lowpart (V4SImode, op2)));
3150
3151 /* Shift input vectors left 32 bits so we can multiply high parts. */
3152 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3153 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3154
3155 /* Multiply high parts by low parts. */
3156 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3157 gen_lowpart (V4SImode, t3)));
3158 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3159 gen_lowpart (V4SImode, t2)));
3160
3161 /* Shift them back. */
3162 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3163 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3164
3165 /* Add the three parts together. */
3166 emit_insn (gen_addv2di3 (t6, t1, t4));
3167 emit_insn (gen_addv2di3 (op0, t6, t5));
3168 DONE;
3169 })
3170
3171 (define_expand "vec_widen_smult_hi_v8hi"
3172 [(match_operand:V4SI 0 "register_operand" "")
3173 (match_operand:V8HI 1 "register_operand" "")
3174 (match_operand:V8HI 2 "register_operand" "")]
3175 "TARGET_SSE2"
3176 {
3177 rtx op1, op2, t1, t2, dest;
3178
3179 op1 = operands[1];
3180 op2 = operands[2];
3181 t1 = gen_reg_rtx (V8HImode);
3182 t2 = gen_reg_rtx (V8HImode);
3183 dest = gen_lowpart (V8HImode, operands[0]);
3184
3185 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3186 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3187 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3188 DONE;
3189 })
3190
3191 (define_expand "vec_widen_smult_lo_v8hi"
3192 [(match_operand:V4SI 0 "register_operand" "")
3193 (match_operand:V8HI 1 "register_operand" "")
3194 (match_operand:V8HI 2 "register_operand" "")]
3195 "TARGET_SSE2"
3196 {
3197 rtx op1, op2, t1, t2, dest;
3198
3199 op1 = operands[1];
3200 op2 = operands[2];
3201 t1 = gen_reg_rtx (V8HImode);
3202 t2 = gen_reg_rtx (V8HImode);
3203 dest = gen_lowpart (V8HImode, operands[0]);
3204
3205 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3206 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3207 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3208 DONE;
3209 })
3210
3211 (define_expand "vec_widen_umult_hi_v8hi"
3212 [(match_operand:V4SI 0 "register_operand" "")
3213 (match_operand:V8HI 1 "register_operand" "")
3214 (match_operand:V8HI 2 "register_operand" "")]
3215 "TARGET_SSE2"
3216 {
3217 rtx op1, op2, t1, t2, dest;
3218
3219 op1 = operands[1];
3220 op2 = operands[2];
3221 t1 = gen_reg_rtx (V8HImode);
3222 t2 = gen_reg_rtx (V8HImode);
3223 dest = gen_lowpart (V8HImode, operands[0]);
3224
3225 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3226 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3227 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3228 DONE;
3229 })
3230
3231 (define_expand "vec_widen_umult_lo_v8hi"
3232 [(match_operand:V4SI 0 "register_operand" "")
3233 (match_operand:V8HI 1 "register_operand" "")
3234 (match_operand:V8HI 2 "register_operand" "")]
3235 "TARGET_SSE2"
3236 {
3237 rtx op1, op2, t1, t2, dest;
3238
3239 op1 = operands[1];
3240 op2 = operands[2];
3241 t1 = gen_reg_rtx (V8HImode);
3242 t2 = gen_reg_rtx (V8HImode);
3243 dest = gen_lowpart (V8HImode, operands[0]);
3244
3245 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3246 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3247 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3248 DONE;
3249 })
3250
3251 (define_expand "vec_widen_smult_hi_v4si"
3252 [(match_operand:V2DI 0 "register_operand" "")
3253 (match_operand:V4SI 1 "register_operand" "")
3254 (match_operand:V4SI 2 "register_operand" "")]
3255 "TARGET_SSE2"
3256 {
3257 rtx op1, op2, t1, t2;
3258
3259 op1 = operands[1];
3260 op2 = operands[2];
3261 t1 = gen_reg_rtx (V4SImode);
3262 t2 = gen_reg_rtx (V4SImode);
3263
3264 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3265 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3266 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3267 DONE;
3268 })
3269
3270 (define_expand "vec_widen_smult_lo_v4si"
3271 [(match_operand:V2DI 0 "register_operand" "")
3272 (match_operand:V4SI 1 "register_operand" "")
3273 (match_operand:V4SI 2 "register_operand" "")]
3274 "TARGET_SSE2"
3275 {
3276 rtx op1, op2, t1, t2;
3277
3278 op1 = operands[1];
3279 op2 = operands[2];
3280 t1 = gen_reg_rtx (V4SImode);
3281 t2 = gen_reg_rtx (V4SImode);
3282
3283 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3284 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3285 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3286 DONE;
3287 })
3288
3289 (define_expand "vec_widen_umult_hi_v4si"
3290 [(match_operand:V2DI 0 "register_operand" "")
3291 (match_operand:V4SI 1 "register_operand" "")
3292 (match_operand:V4SI 2 "register_operand" "")]
3293 "TARGET_SSE2"
3294 {
3295 rtx op1, op2, t1, t2;
3296
3297 op1 = operands[1];
3298 op2 = operands[2];
3299 t1 = gen_reg_rtx (V4SImode);
3300 t2 = gen_reg_rtx (V4SImode);
3301
3302 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3303 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3304 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3305 DONE;
3306 })
3307
3308 (define_expand "vec_widen_umult_lo_v4si"
3309 [(match_operand:V2DI 0 "register_operand" "")
3310 (match_operand:V4SI 1 "register_operand" "")
3311 (match_operand:V4SI 2 "register_operand" "")]
3312 "TARGET_SSE2"
3313 {
3314 rtx op1, op2, t1, t2;
3315
3316 op1 = operands[1];
3317 op2 = operands[2];
3318 t1 = gen_reg_rtx (V4SImode);
3319 t2 = gen_reg_rtx (V4SImode);
3320
3321 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3322 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3323 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3324 DONE;
3325 })
3326
3327 (define_expand "sdot_prodv8hi"
3328 [(match_operand:V4SI 0 "register_operand" "")
3329 (match_operand:V8HI 1 "register_operand" "")
3330 (match_operand:V8HI 2 "register_operand" "")
3331 (match_operand:V4SI 3 "register_operand" "")]
3332 "TARGET_SSE2"
3333 {
3334 rtx t = gen_reg_rtx (V4SImode);
3335 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3336 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3337 DONE;
3338 })
3339
3340 (define_expand "udot_prodv4si"
3341 [(match_operand:V2DI 0 "register_operand" "")
3342 (match_operand:V4SI 1 "register_operand" "")
3343 (match_operand:V4SI 2 "register_operand" "")
3344 (match_operand:V2DI 3 "register_operand" "")]
3345 "TARGET_SSE2"
3346 {
3347 rtx t1, t2, t3, t4;
3348
3349 t1 = gen_reg_rtx (V2DImode);
3350 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3351 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3352
3353 t2 = gen_reg_rtx (V4SImode);
3354 t3 = gen_reg_rtx (V4SImode);
3355 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3356 gen_lowpart (TImode, operands[1]),
3357 GEN_INT (32)));
3358 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3359 gen_lowpart (TImode, operands[2]),
3360 GEN_INT (32)));
3361
3362 t4 = gen_reg_rtx (V2DImode);
3363 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3364
3365 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3366 DONE;
3367 })
3368
3369 (define_insn "ashr<mode>3"
3370 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3371 (ashiftrt:SSEMODE24
3372 (match_operand:SSEMODE24 1 "register_operand" "0")
3373 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3374 "TARGET_SSE2"
3375 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3376 [(set_attr "type" "sseishft")
3377 (set_attr "prefix_data16" "1")
3378 (set_attr "mode" "TI")])
3379
3380 (define_insn "lshr<mode>3"
3381 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3382 (lshiftrt:SSEMODE248
3383 (match_operand:SSEMODE248 1 "register_operand" "0")
3384 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3385 "TARGET_SSE2"
3386 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3387 [(set_attr "type" "sseishft")
3388 (set_attr "prefix_data16" "1")
3389 (set_attr "mode" "TI")])
3390
3391 (define_insn "ashl<mode>3"
3392 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3393 (ashift:SSEMODE248
3394 (match_operand:SSEMODE248 1 "register_operand" "0")
3395 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3396 "TARGET_SSE2"
3397 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3398 [(set_attr "type" "sseishft")
3399 (set_attr "prefix_data16" "1")
3400 (set_attr "mode" "TI")])
3401
3402 (define_expand "vec_shl_<mode>"
3403 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3404 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3405 (match_operand:SI 2 "general_operand" "")))]
3406 "TARGET_SSE2"
3407 {
3408 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3409 FAIL;
3410 operands[0] = gen_lowpart (TImode, operands[0]);
3411 operands[1] = gen_lowpart (TImode, operands[1]);
3412 })
3413
3414 (define_expand "vec_shr_<mode>"
3415 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3416 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3417 (match_operand:SI 2 "general_operand" "")))]
3418 "TARGET_SSE2"
3419 {
3420 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3421 FAIL;
3422 operands[0] = gen_lowpart (TImode, operands[0]);
3423 operands[1] = gen_lowpart (TImode, operands[1]);
3424 })
3425
3426 (define_expand "umaxv16qi3"
3427 [(set (match_operand:V16QI 0 "register_operand" "")
3428 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3429 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3430 "TARGET_SSE2"
3431 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3432
3433 (define_insn "*umaxv16qi3"
3434 [(set (match_operand:V16QI 0 "register_operand" "=x")
3435 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3436 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3437 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3438 "pmaxub\t{%2, %0|%0, %2}"
3439 [(set_attr "type" "sseiadd")
3440 (set_attr "prefix_data16" "1")
3441 (set_attr "mode" "TI")])
3442
3443 (define_expand "smaxv8hi3"
3444 [(set (match_operand:V8HI 0 "register_operand" "")
3445 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3446 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3447 "TARGET_SSE2"
3448 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3449
3450 (define_insn "*smaxv8hi3"
3451 [(set (match_operand:V8HI 0 "register_operand" "=x")
3452 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3453 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3454 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3455 "pmaxsw\t{%2, %0|%0, %2}"
3456 [(set_attr "type" "sseiadd")
3457 (set_attr "prefix_data16" "1")
3458 (set_attr "mode" "TI")])
3459
3460 (define_expand "umaxv8hi3"
3461 [(set (match_operand:V8HI 0 "register_operand" "")
3462 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3463 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3464 "TARGET_SSE2"
3465 {
3466 if (TARGET_SSE4_1)
3467 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3468 else
3469 {
3470 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3471 if (rtx_equal_p (op3, op2))
3472 op3 = gen_reg_rtx (V8HImode);
3473 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3474 emit_insn (gen_addv8hi3 (op0, op3, op2));
3475 DONE;
3476 }
3477 })
3478
3479 (define_expand "smax<mode>3"
3480 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3481 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3482 (match_operand:SSEMODE14 2 "register_operand" "")))]
3483 "TARGET_SSE2"
3484 {
3485 if (TARGET_SSE4_1)
3486 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3487 else
3488 {
3489 rtx xops[6];
3490 bool ok;
3491
3492 xops[0] = operands[0];
3493 xops[1] = operands[1];
3494 xops[2] = operands[2];
3495 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3496 xops[4] = operands[1];
3497 xops[5] = operands[2];
3498 ok = ix86_expand_int_vcond (xops);
3499 gcc_assert (ok);
3500 DONE;
3501 }
3502 })
3503
3504 (define_insn "*sse4_1_smax<mode>3"
3505 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3506 (smax:SSEMODE14
3507 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3508 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3509 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
3510 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
3511 [(set_attr "type" "sseiadd")
3512 (set_attr "prefix_extra" "1")
3513 (set_attr "mode" "TI")])
3514
3515 (define_expand "umaxv4si3"
3516 [(set (match_operand:V4SI 0 "register_operand" "")
3517 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3518 (match_operand:V4SI 2 "register_operand" "")))]
3519 "TARGET_SSE2"
3520 {
3521 if (TARGET_SSE4_1)
3522 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3523 else
3524 {
3525 rtx xops[6];
3526 bool ok;
3527
3528 xops[0] = operands[0];
3529 xops[1] = operands[1];
3530 xops[2] = operands[2];
3531 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3532 xops[4] = operands[1];
3533 xops[5] = operands[2];
3534 ok = ix86_expand_int_vcond (xops);
3535 gcc_assert (ok);
3536 DONE;
3537 }
3538 })
3539
3540 (define_insn "*sse4_1_umax<mode>3"
3541 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3542 (umax:SSEMODE24
3543 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3544 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3545 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
3546 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
3547 [(set_attr "type" "sseiadd")
3548 (set_attr "prefix_extra" "1")
3549 (set_attr "mode" "TI")])
3550
3551 (define_expand "uminv16qi3"
3552 [(set (match_operand:V16QI 0 "register_operand" "")
3553 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3554 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3555 "TARGET_SSE2"
3556 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3557
3558 (define_insn "*uminv16qi3"
3559 [(set (match_operand:V16QI 0 "register_operand" "=x")
3560 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3561 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3562 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3563 "pminub\t{%2, %0|%0, %2}"
3564 [(set_attr "type" "sseiadd")
3565 (set_attr "prefix_data16" "1")
3566 (set_attr "mode" "TI")])
3567
3568 (define_expand "sminv8hi3"
3569 [(set (match_operand:V8HI 0 "register_operand" "")
3570 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3571 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3572 "TARGET_SSE2"
3573 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3574
3575 (define_insn "*sminv8hi3"
3576 [(set (match_operand:V8HI 0 "register_operand" "=x")
3577 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3578 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3579 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3580 "pminsw\t{%2, %0|%0, %2}"
3581 [(set_attr "type" "sseiadd")
3582 (set_attr "prefix_data16" "1")
3583 (set_attr "mode" "TI")])
3584
3585 (define_expand "smin<mode>3"
3586 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3587 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3588 (match_operand:SSEMODE14 2 "register_operand" "")))]
3589 "TARGET_SSE2"
3590 {
3591 if (TARGET_SSE4_1)
3592 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3593 else
3594 {
3595 rtx xops[6];
3596 bool ok;
3597
3598 xops[0] = operands[0];
3599 xops[1] = operands[2];
3600 xops[2] = operands[1];
3601 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3602 xops[4] = operands[1];
3603 xops[5] = operands[2];
3604 ok = ix86_expand_int_vcond (xops);
3605 gcc_assert (ok);
3606 DONE;
3607 }
3608 })
3609
3610 (define_insn "*sse4_1_smin<mode>3"
3611 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3612 (smin:SSEMODE14
3613 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3614 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3615 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
3616 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
3617 [(set_attr "type" "sseiadd")
3618 (set_attr "prefix_extra" "1")
3619 (set_attr "mode" "TI")])
3620
3621 (define_expand "umin<mode>3"
3622 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3623 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3624 (match_operand:SSEMODE24 2 "register_operand" "")))]
3625 "TARGET_SSE2"
3626 {
3627 if (TARGET_SSE4_1)
3628 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3629 else
3630 {
3631 rtx xops[6];
3632 bool ok;
3633
3634 xops[0] = operands[0];
3635 xops[1] = operands[2];
3636 xops[2] = operands[1];
3637 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3638 xops[4] = operands[1];
3639 xops[5] = operands[2];
3640 ok = ix86_expand_int_vcond (xops);
3641 gcc_assert (ok);
3642 DONE;
3643 }
3644 })
3645
3646 (define_insn "*sse4_1_umin<mode>3"
3647 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3648 (umin:SSEMODE24
3649 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3650 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3651 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
3652 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
3653 [(set_attr "type" "sseiadd")
3654 (set_attr "prefix_extra" "1")
3655 (set_attr "mode" "TI")])
3656
3657 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3658 ;;
3659 ;; Parallel integral comparisons
3660 ;;
3661 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3662
3663 (define_insn "sse2_eq<mode>3"
3664 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3665 (eq:SSEMODE124
3666 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3667 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3668 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3669 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3670 [(set_attr "type" "ssecmp")
3671 (set_attr "prefix_data16" "1")
3672 (set_attr "mode" "TI")])
3673
3674 (define_insn "sse4_1_eqv2di3"
3675 [(set (match_operand:V2DI 0 "register_operand" "=x")
3676 (eq:V2DI
3677 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3678 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3679 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3680 "pcmpeqq\t{%2, %0|%0, %2}"
3681 [(set_attr "type" "ssecmp")
3682 (set_attr "prefix_extra" "1")
3683 (set_attr "mode" "TI")])
3684
3685 (define_insn "sse2_gt<mode>3"
3686 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3687 (gt:SSEMODE124
3688 (match_operand:SSEMODE124 1 "register_operand" "0")
3689 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3690 "TARGET_SSE2"
3691 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3692 [(set_attr "type" "ssecmp")
3693 (set_attr "prefix_data16" "1")
3694 (set_attr "mode" "TI")])
3695
3696 (define_insn "sse4_2_gtv2di3"
3697 [(set (match_operand:V2DI 0 "register_operand" "=x")
3698 (gt:V2DI
3699 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3700 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3701 "TARGET_SSE4_2"
3702 "pcmpgtq\t{%2, %0|%0, %2}"
3703 [(set_attr "type" "ssecmp")
3704 (set_attr "mode" "TI")])
3705
3706 (define_expand "vcond<mode>"
3707 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3708 (if_then_else:SSEMODEI
3709 (match_operator 3 ""
3710 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3711 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3712 (match_operand:SSEMODEI 1 "general_operand" "")
3713 (match_operand:SSEMODEI 2 "general_operand" "")))]
3714 "TARGET_SSE2"
3715 {
3716 if (ix86_expand_int_vcond (operands))
3717 DONE;
3718 else
3719 FAIL;
3720 })
3721
3722 (define_expand "vcondu<mode>"
3723 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3724 (if_then_else:SSEMODEI
3725 (match_operator 3 ""
3726 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3727 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3728 (match_operand:SSEMODEI 1 "general_operand" "")
3729 (match_operand:SSEMODEI 2 "general_operand" "")))]
3730 "TARGET_SSE2"
3731 {
3732 if (ix86_expand_int_vcond (operands))
3733 DONE;
3734 else
3735 FAIL;
3736 })
3737
3738 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3739 ;;
3740 ;; Parallel bitwise logical operations
3741 ;;
3742 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3743
3744 (define_expand "one_cmpl<mode>2"
3745 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3746 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3747 (match_dup 2)))]
3748 "TARGET_SSE2"
3749 {
3750 int i, n = GET_MODE_NUNITS (<MODE>mode);
3751 rtvec v = rtvec_alloc (n);
3752
3753 for (i = 0; i < n; ++i)
3754 RTVEC_ELT (v, i) = constm1_rtx;
3755
3756 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3757 })
3758
3759 (define_expand "and<mode>3"
3760 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3761 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3762 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3763 "TARGET_SSE2"
3764 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3765
3766 (define_insn "*and<mode>3"
3767 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3768 (and:SSEMODEI
3769 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3770 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3771 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3772 "pand\t{%2, %0|%0, %2}"
3773 [(set_attr "type" "sselog")
3774 (set_attr "prefix_data16" "1")
3775 (set_attr "mode" "TI")])
3776
3777 (define_insn "sse2_nand<mode>3"
3778 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3779 (and:SSEMODEI
3780 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3781 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3782 "TARGET_SSE2"
3783 "pandn\t{%2, %0|%0, %2}"
3784 [(set_attr "type" "sselog")
3785 (set_attr "prefix_data16" "1")
3786 (set_attr "mode" "TI")])
3787
3788 (define_expand "andtf3"
3789 [(set (match_operand:TF 0 "register_operand" "")
3790 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
3791 (match_operand:TF 2 "nonimmediate_operand" "")))]
3792 "TARGET_64BIT"
3793 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
3794
3795 (define_insn "*andtf3"
3796 [(set (match_operand:TF 0 "register_operand" "=x")
3797 (and:TF
3798 (match_operand:TF 1 "nonimmediate_operand" "%0")
3799 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3800 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
3801 "pand\t{%2, %0|%0, %2}"
3802 [(set_attr "type" "sselog")
3803 (set_attr "prefix_data16" "1")
3804 (set_attr "mode" "TI")])
3805
3806 (define_insn "*nandtf3"
3807 [(set (match_operand:TF 0 "register_operand" "=x")
3808 (and:TF
3809 (not:TF (match_operand:TF 1 "register_operand" "0"))
3810 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3811 "TARGET_64BIT"
3812 "pandn\t{%2, %0|%0, %2}"
3813 [(set_attr "type" "sselog")
3814 (set_attr "prefix_data16" "1")
3815 (set_attr "mode" "TI")])
3816
3817 (define_expand "ior<mode>3"
3818 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3819 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3820 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3821 "TARGET_SSE2"
3822 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3823
3824 (define_insn "*ior<mode>3"
3825 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3826 (ior:SSEMODEI
3827 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3828 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3829 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3830 "por\t{%2, %0|%0, %2}"
3831 [(set_attr "type" "sselog")
3832 (set_attr "prefix_data16" "1")
3833 (set_attr "mode" "TI")])
3834
3835 (define_expand "iortf3"
3836 [(set (match_operand:TF 0 "register_operand" "")
3837 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
3838 (match_operand:TF 2 "nonimmediate_operand" "")))]
3839 "TARGET_64BIT"
3840 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
3841
3842 (define_insn "*iortf3"
3843 [(set (match_operand:TF 0 "register_operand" "=x")
3844 (ior:TF
3845 (match_operand:TF 1 "nonimmediate_operand" "%0")
3846 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3847 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
3848 "por\t{%2, %0|%0, %2}"
3849 [(set_attr "type" "sselog")
3850 (set_attr "prefix_data16" "1")
3851 (set_attr "mode" "TI")])
3852
3853 (define_expand "xor<mode>3"
3854 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3855 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3856 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3857 "TARGET_SSE2"
3858 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3859
3860 (define_insn "*xor<mode>3"
3861 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3862 (xor:SSEMODEI
3863 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3864 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3865 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3866 "pxor\t{%2, %0|%0, %2}"
3867 [(set_attr "type" "sselog")
3868 (set_attr "prefix_data16" "1")
3869 (set_attr "mode" "TI")])
3870
3871 (define_expand "xortf3"
3872 [(set (match_operand:TF 0 "register_operand" "")
3873 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
3874 (match_operand:TF 2 "nonimmediate_operand" "")))]
3875 "TARGET_64BIT"
3876 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
3877
3878 (define_insn "*xortf3"
3879 [(set (match_operand:TF 0 "register_operand" "=x")
3880 (xor:TF
3881 (match_operand:TF 1 "nonimmediate_operand" "%0")
3882 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3883 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
3884 "pxor\t{%2, %0|%0, %2}"
3885 [(set_attr "type" "sselog")
3886 (set_attr "prefix_data16" "1")
3887 (set_attr "mode" "TI")])
3888
3889 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3890 ;;
3891 ;; Parallel integral element swizzling
3892 ;;
3893 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3894
3895 ;; Reduce:
3896 ;; op1 = abcdefghijklmnop
3897 ;; op2 = qrstuvwxyz012345
3898 ;; h1 = aqbrcsdteufvgwhx
3899 ;; l1 = iyjzk0l1m2n3o4p5
3900 ;; h2 = aiqybjrzcks0dlt1
3901 ;; l2 = emu2fnv3gow4hpx5
3902 ;; h3 = aeimquy2bfjnrvz3
3903 ;; l3 = cgkosw04dhlptx15
3904 ;; result = bdfhjlnprtvxz135
3905 (define_expand "vec_pack_trunc_v8hi"
3906 [(match_operand:V16QI 0 "register_operand" "")
3907 (match_operand:V8HI 1 "register_operand" "")
3908 (match_operand:V8HI 2 "register_operand" "")]
3909 "TARGET_SSE2"
3910 {
3911 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3912
3913 op1 = gen_lowpart (V16QImode, operands[1]);
3914 op2 = gen_lowpart (V16QImode, operands[2]);
3915 h1 = gen_reg_rtx (V16QImode);
3916 l1 = gen_reg_rtx (V16QImode);
3917 h2 = gen_reg_rtx (V16QImode);
3918 l2 = gen_reg_rtx (V16QImode);
3919 h3 = gen_reg_rtx (V16QImode);
3920 l3 = gen_reg_rtx (V16QImode);
3921
3922 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3923 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3924 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3925 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3926 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3927 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3928 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3929 DONE;
3930 })
3931
3932 ;; Reduce:
3933 ;; op1 = abcdefgh
3934 ;; op2 = ijklmnop
3935 ;; h1 = aibjckdl
3936 ;; l1 = emfngohp
3937 ;; h2 = aeimbfjn
3938 ;; l2 = cgkodhlp
3939 ;; result = bdfhjlnp
3940 (define_expand "vec_pack_trunc_v4si"
3941 [(match_operand:V8HI 0 "register_operand" "")
3942 (match_operand:V4SI 1 "register_operand" "")
3943 (match_operand:V4SI 2 "register_operand" "")]
3944 "TARGET_SSE2"
3945 {
3946 rtx op1, op2, h1, l1, h2, l2;
3947
3948 op1 = gen_lowpart (V8HImode, operands[1]);
3949 op2 = gen_lowpart (V8HImode, operands[2]);
3950 h1 = gen_reg_rtx (V8HImode);
3951 l1 = gen_reg_rtx (V8HImode);
3952 h2 = gen_reg_rtx (V8HImode);
3953 l2 = gen_reg_rtx (V8HImode);
3954
3955 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3956 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3957 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3958 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3959 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3960 DONE;
3961 })
3962
3963 ;; Reduce:
3964 ;; op1 = abcd
3965 ;; op2 = efgh
3966 ;; h1 = aebf
3967 ;; l1 = cgdh
3968 ;; result = bdfh
3969 (define_expand "vec_pack_trunc_v2di"
3970 [(match_operand:V4SI 0 "register_operand" "")
3971 (match_operand:V2DI 1 "register_operand" "")
3972 (match_operand:V2DI 2 "register_operand" "")]
3973 "TARGET_SSE2"
3974 {
3975 rtx op1, op2, h1, l1;
3976
3977 op1 = gen_lowpart (V4SImode, operands[1]);
3978 op2 = gen_lowpart (V4SImode, operands[2]);
3979 h1 = gen_reg_rtx (V4SImode);
3980 l1 = gen_reg_rtx (V4SImode);
3981
3982 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3983 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3984 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3985 DONE;
3986 })
3987
3988 (define_expand "vec_interleave_highv16qi"
3989 [(set (match_operand:V16QI 0 "register_operand" "=x")
3990 (vec_select:V16QI
3991 (vec_concat:V32QI
3992 (match_operand:V16QI 1 "register_operand" "0")
3993 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3994 (parallel [(const_int 8) (const_int 24)
3995 (const_int 9) (const_int 25)
3996 (const_int 10) (const_int 26)
3997 (const_int 11) (const_int 27)
3998 (const_int 12) (const_int 28)
3999 (const_int 13) (const_int 29)
4000 (const_int 14) (const_int 30)
4001 (const_int 15) (const_int 31)])))]
4002 "TARGET_SSE2"
4003 {
4004 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4005 DONE;
4006 })
4007
4008 (define_expand "vec_interleave_lowv16qi"
4009 [(set (match_operand:V16QI 0 "register_operand" "=x")
4010 (vec_select:V16QI
4011 (vec_concat:V32QI
4012 (match_operand:V16QI 1 "register_operand" "0")
4013 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4014 (parallel [(const_int 0) (const_int 16)
4015 (const_int 1) (const_int 17)
4016 (const_int 2) (const_int 18)
4017 (const_int 3) (const_int 19)
4018 (const_int 4) (const_int 20)
4019 (const_int 5) (const_int 21)
4020 (const_int 6) (const_int 22)
4021 (const_int 7) (const_int 23)])))]
4022 "TARGET_SSE2"
4023 {
4024 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4025 DONE;
4026 })
4027
4028 (define_expand "vec_interleave_highv8hi"
4029 [(set (match_operand:V8HI 0 "register_operand" "=x")
4030 (vec_select:V8HI
4031 (vec_concat:V16HI
4032 (match_operand:V8HI 1 "register_operand" "0")
4033 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4034 (parallel [(const_int 4) (const_int 12)
4035 (const_int 5) (const_int 13)
4036 (const_int 6) (const_int 14)
4037 (const_int 7) (const_int 15)])))]
4038 "TARGET_SSE2"
4039 {
4040 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4041 DONE;
4042 })
4043
4044 (define_expand "vec_interleave_lowv8hi"
4045 [(set (match_operand:V8HI 0 "register_operand" "=x")
4046 (vec_select:V8HI
4047 (vec_concat:V16HI
4048 (match_operand:V8HI 1 "register_operand" "0")
4049 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4050 (parallel [(const_int 0) (const_int 8)
4051 (const_int 1) (const_int 9)
4052 (const_int 2) (const_int 10)
4053 (const_int 3) (const_int 11)])))]
4054 "TARGET_SSE2"
4055 {
4056 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4057 DONE;
4058 })
4059
4060 (define_expand "vec_interleave_highv4si"
4061 [(set (match_operand:V4SI 0 "register_operand" "=x")
4062 (vec_select:V4SI
4063 (vec_concat:V8SI
4064 (match_operand:V4SI 1 "register_operand" "0")
4065 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4066 (parallel [(const_int 2) (const_int 6)
4067 (const_int 3) (const_int 7)])))]
4068 "TARGET_SSE2"
4069 {
4070 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4071 DONE;
4072 })
4073
4074 (define_expand "vec_interleave_lowv4si"
4075 [(set (match_operand:V4SI 0 "register_operand" "=x")
4076 (vec_select:V4SI
4077 (vec_concat:V8SI
4078 (match_operand:V4SI 1 "register_operand" "0")
4079 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4080 (parallel [(const_int 0) (const_int 4)
4081 (const_int 1) (const_int 5)])))]
4082 "TARGET_SSE2"
4083 {
4084 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4085 DONE;
4086 })
4087
4088 (define_expand "vec_interleave_highv2di"
4089 [(set (match_operand:V2DI 0 "register_operand" "=x")
4090 (vec_select:V2DI
4091 (vec_concat:V4DI
4092 (match_operand:V2DI 1 "register_operand" "0")
4093 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4094 (parallel [(const_int 1)
4095 (const_int 3)])))]
4096 "TARGET_SSE2"
4097 {
4098 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4099 DONE;
4100 })
4101
4102 (define_expand "vec_interleave_lowv2di"
4103 [(set (match_operand:V2DI 0 "register_operand" "=x")
4104 (vec_select:V2DI
4105 (vec_concat:V4DI
4106 (match_operand:V2DI 1 "register_operand" "0")
4107 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4108 (parallel [(const_int 0)
4109 (const_int 2)])))]
4110 "TARGET_SSE2"
4111 {
4112 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4113 DONE;
4114 })
4115
4116 (define_insn "sse2_packsswb"
4117 [(set (match_operand:V16QI 0 "register_operand" "=x")
4118 (vec_concat:V16QI
4119 (ss_truncate:V8QI
4120 (match_operand:V8HI 1 "register_operand" "0"))
4121 (ss_truncate:V8QI
4122 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4123 "TARGET_SSE2"
4124 "packsswb\t{%2, %0|%0, %2}"
4125 [(set_attr "type" "sselog")
4126 (set_attr "prefix_data16" "1")
4127 (set_attr "mode" "TI")])
4128
4129 (define_insn "sse2_packssdw"
4130 [(set (match_operand:V8HI 0 "register_operand" "=x")
4131 (vec_concat:V8HI
4132 (ss_truncate:V4HI
4133 (match_operand:V4SI 1 "register_operand" "0"))
4134 (ss_truncate:V4HI
4135 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4136 "TARGET_SSE2"
4137 "packssdw\t{%2, %0|%0, %2}"
4138 [(set_attr "type" "sselog")
4139 (set_attr "prefix_data16" "1")
4140 (set_attr "mode" "TI")])
4141
4142 (define_insn "sse2_packuswb"
4143 [(set (match_operand:V16QI 0 "register_operand" "=x")
4144 (vec_concat:V16QI
4145 (us_truncate:V8QI
4146 (match_operand:V8HI 1 "register_operand" "0"))
4147 (us_truncate:V8QI
4148 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4149 "TARGET_SSE2"
4150 "packuswb\t{%2, %0|%0, %2}"
4151 [(set_attr "type" "sselog")
4152 (set_attr "prefix_data16" "1")
4153 (set_attr "mode" "TI")])
4154
4155 (define_insn "sse2_punpckhbw"
4156 [(set (match_operand:V16QI 0 "register_operand" "=x")
4157 (vec_select:V16QI
4158 (vec_concat:V32QI
4159 (match_operand:V16QI 1 "register_operand" "0")
4160 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4161 (parallel [(const_int 8) (const_int 24)
4162 (const_int 9) (const_int 25)
4163 (const_int 10) (const_int 26)
4164 (const_int 11) (const_int 27)
4165 (const_int 12) (const_int 28)
4166 (const_int 13) (const_int 29)
4167 (const_int 14) (const_int 30)
4168 (const_int 15) (const_int 31)])))]
4169 "TARGET_SSE2"
4170 "punpckhbw\t{%2, %0|%0, %2}"
4171 [(set_attr "type" "sselog")
4172 (set_attr "prefix_data16" "1")
4173 (set_attr "mode" "TI")])
4174
4175 (define_insn "sse2_punpcklbw"
4176 [(set (match_operand:V16QI 0 "register_operand" "=x")
4177 (vec_select:V16QI
4178 (vec_concat:V32QI
4179 (match_operand:V16QI 1 "register_operand" "0")
4180 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4181 (parallel [(const_int 0) (const_int 16)
4182 (const_int 1) (const_int 17)
4183 (const_int 2) (const_int 18)
4184 (const_int 3) (const_int 19)
4185 (const_int 4) (const_int 20)
4186 (const_int 5) (const_int 21)
4187 (const_int 6) (const_int 22)
4188 (const_int 7) (const_int 23)])))]
4189 "TARGET_SSE2"
4190 "punpcklbw\t{%2, %0|%0, %2}"
4191 [(set_attr "type" "sselog")
4192 (set_attr "prefix_data16" "1")
4193 (set_attr "mode" "TI")])
4194
4195 (define_insn "sse2_punpckhwd"
4196 [(set (match_operand:V8HI 0 "register_operand" "=x")
4197 (vec_select:V8HI
4198 (vec_concat:V16HI
4199 (match_operand:V8HI 1 "register_operand" "0")
4200 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4201 (parallel [(const_int 4) (const_int 12)
4202 (const_int 5) (const_int 13)
4203 (const_int 6) (const_int 14)
4204 (const_int 7) (const_int 15)])))]
4205 "TARGET_SSE2"
4206 "punpckhwd\t{%2, %0|%0, %2}"
4207 [(set_attr "type" "sselog")
4208 (set_attr "prefix_data16" "1")
4209 (set_attr "mode" "TI")])
4210
4211 (define_insn "sse2_punpcklwd"
4212 [(set (match_operand:V8HI 0 "register_operand" "=x")
4213 (vec_select:V8HI
4214 (vec_concat:V16HI
4215 (match_operand:V8HI 1 "register_operand" "0")
4216 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4217 (parallel [(const_int 0) (const_int 8)
4218 (const_int 1) (const_int 9)
4219 (const_int 2) (const_int 10)
4220 (const_int 3) (const_int 11)])))]
4221 "TARGET_SSE2"
4222 "punpcklwd\t{%2, %0|%0, %2}"
4223 [(set_attr "type" "sselog")
4224 (set_attr "prefix_data16" "1")
4225 (set_attr "mode" "TI")])
4226
4227 (define_insn "sse2_punpckhdq"
4228 [(set (match_operand:V4SI 0 "register_operand" "=x")
4229 (vec_select:V4SI
4230 (vec_concat:V8SI
4231 (match_operand:V4SI 1 "register_operand" "0")
4232 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4233 (parallel [(const_int 2) (const_int 6)
4234 (const_int 3) (const_int 7)])))]
4235 "TARGET_SSE2"
4236 "punpckhdq\t{%2, %0|%0, %2}"
4237 [(set_attr "type" "sselog")
4238 (set_attr "prefix_data16" "1")
4239 (set_attr "mode" "TI")])
4240
4241 (define_insn "sse2_punpckldq"
4242 [(set (match_operand:V4SI 0 "register_operand" "=x")
4243 (vec_select:V4SI
4244 (vec_concat:V8SI
4245 (match_operand:V4SI 1 "register_operand" "0")
4246 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4247 (parallel [(const_int 0) (const_int 4)
4248 (const_int 1) (const_int 5)])))]
4249 "TARGET_SSE2"
4250 "punpckldq\t{%2, %0|%0, %2}"
4251 [(set_attr "type" "sselog")
4252 (set_attr "prefix_data16" "1")
4253 (set_attr "mode" "TI")])
4254
4255 (define_insn "sse2_punpckhqdq"
4256 [(set (match_operand:V2DI 0 "register_operand" "=x")
4257 (vec_select:V2DI
4258 (vec_concat:V4DI
4259 (match_operand:V2DI 1 "register_operand" "0")
4260 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4261 (parallel [(const_int 1)
4262 (const_int 3)])))]
4263 "TARGET_SSE2"
4264 "punpckhqdq\t{%2, %0|%0, %2}"
4265 [(set_attr "type" "sselog")
4266 (set_attr "prefix_data16" "1")
4267 (set_attr "mode" "TI")])
4268
4269 (define_insn "sse2_punpcklqdq"
4270 [(set (match_operand:V2DI 0 "register_operand" "=x")
4271 (vec_select:V2DI
4272 (vec_concat:V4DI
4273 (match_operand:V2DI 1 "register_operand" "0")
4274 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4275 (parallel [(const_int 0)
4276 (const_int 2)])))]
4277 "TARGET_SSE2"
4278 "punpcklqdq\t{%2, %0|%0, %2}"
4279 [(set_attr "type" "sselog")
4280 (set_attr "prefix_data16" "1")
4281 (set_attr "mode" "TI")])
4282
4283 (define_insn "*sse4_1_pinsrb"
4284 [(set (match_operand:V16QI 0 "register_operand" "=x")
4285 (vec_merge:V16QI
4286 (vec_duplicate:V16QI
4287 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4288 (match_operand:V16QI 1 "register_operand" "0")
4289 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4290 "TARGET_SSE4_1"
4291 {
4292 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4293 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4294 }
4295 [(set_attr "type" "sselog")
4296 (set_attr "prefix_extra" "1")
4297 (set_attr "mode" "TI")])
4298
4299 (define_insn "*sse2_pinsrw"
4300 [(set (match_operand:V8HI 0 "register_operand" "=x")
4301 (vec_merge:V8HI
4302 (vec_duplicate:V8HI
4303 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4304 (match_operand:V8HI 1 "register_operand" "0")
4305 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4306 "TARGET_SSE2"
4307 {
4308 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4309 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4310 }
4311 [(set_attr "type" "sselog")
4312 (set_attr "prefix_data16" "1")
4313 (set_attr "mode" "TI")])
4314
4315 ;; It must come before sse2_loadld since it is preferred.
4316 (define_insn "*sse4_1_pinsrd"
4317 [(set (match_operand:V4SI 0 "register_operand" "=x")
4318 (vec_merge:V4SI
4319 (vec_duplicate:V4SI
4320 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4321 (match_operand:V4SI 1 "register_operand" "0")
4322 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4323 "TARGET_SSE4_1"
4324 {
4325 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4326 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4327 }
4328 [(set_attr "type" "sselog")
4329 (set_attr "prefix_extra" "1")
4330 (set_attr "mode" "TI")])
4331
4332 (define_insn "*sse4_1_pinsrq"
4333 [(set (match_operand:V2DI 0 "register_operand" "=x")
4334 (vec_merge:V2DI
4335 (vec_duplicate:V2DI
4336 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4337 (match_operand:V2DI 1 "register_operand" "0")
4338 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4339 "TARGET_SSE4_1"
4340 {
4341 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4342 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4343 }
4344 [(set_attr "type" "sselog")
4345 (set_attr "prefix_extra" "1")
4346 (set_attr "mode" "TI")])
4347
4348 (define_insn "*sse4_1_pextrb"
4349 [(set (match_operand:SI 0 "register_operand" "=r")
4350 (zero_extend:SI
4351 (vec_select:QI
4352 (match_operand:V16QI 1 "register_operand" "x")
4353 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4354 "TARGET_SSE4_1"
4355 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4356 [(set_attr "type" "sselog")
4357 (set_attr "prefix_extra" "1")
4358 (set_attr "mode" "TI")])
4359
4360 (define_insn "*sse4_1_pextrb_memory"
4361 [(set (match_operand:QI 0 "memory_operand" "=m")
4362 (vec_select:QI
4363 (match_operand:V16QI 1 "register_operand" "x")
4364 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4365 "TARGET_SSE4_1"
4366 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4367 [(set_attr "type" "sselog")
4368 (set_attr "prefix_extra" "1")
4369 (set_attr "mode" "TI")])
4370
4371 (define_insn "*sse2_pextrw"
4372 [(set (match_operand:SI 0 "register_operand" "=r")
4373 (zero_extend:SI
4374 (vec_select:HI
4375 (match_operand:V8HI 1 "register_operand" "x")
4376 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4377 "TARGET_SSE2"
4378 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4379 [(set_attr "type" "sselog")
4380 (set_attr "prefix_data16" "1")
4381 (set_attr "mode" "TI")])
4382
4383 (define_insn "*sse4_1_pextrw_memory"
4384 [(set (match_operand:HI 0 "memory_operand" "=m")
4385 (vec_select:HI
4386 (match_operand:V8HI 1 "register_operand" "x")
4387 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4388 "TARGET_SSE4_1"
4389 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4390 [(set_attr "type" "sselog")
4391 (set_attr "prefix_extra" "1")
4392 (set_attr "mode" "TI")])
4393
4394 (define_insn "*sse4_1_pextrd"
4395 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4396 (vec_select:SI
4397 (match_operand:V4SI 1 "register_operand" "x")
4398 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4399 "TARGET_SSE4_1"
4400 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4401 [(set_attr "type" "sselog")
4402 (set_attr "prefix_extra" "1")
4403 (set_attr "mode" "TI")])
4404
4405 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4406 (define_insn "*sse4_1_pextrq"
4407 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4408 (vec_select:DI
4409 (match_operand:V2DI 1 "register_operand" "x")
4410 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4411 "TARGET_SSE4_1 && TARGET_64BIT"
4412 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4413 [(set_attr "type" "sselog")
4414 (set_attr "prefix_extra" "1")
4415 (set_attr "mode" "TI")])
4416
4417 (define_expand "sse2_pshufd"
4418 [(match_operand:V4SI 0 "register_operand" "")
4419 (match_operand:V4SI 1 "nonimmediate_operand" "")
4420 (match_operand:SI 2 "const_int_operand" "")]
4421 "TARGET_SSE2"
4422 {
4423 int mask = INTVAL (operands[2]);
4424 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4425 GEN_INT ((mask >> 0) & 3),
4426 GEN_INT ((mask >> 2) & 3),
4427 GEN_INT ((mask >> 4) & 3),
4428 GEN_INT ((mask >> 6) & 3)));
4429 DONE;
4430 })
4431
4432 (define_insn "sse2_pshufd_1"
4433 [(set (match_operand:V4SI 0 "register_operand" "=x")
4434 (vec_select:V4SI
4435 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4436 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4437 (match_operand 3 "const_0_to_3_operand" "")
4438 (match_operand 4 "const_0_to_3_operand" "")
4439 (match_operand 5 "const_0_to_3_operand" "")])))]
4440 "TARGET_SSE2"
4441 {
4442 int mask = 0;
4443 mask |= INTVAL (operands[2]) << 0;
4444 mask |= INTVAL (operands[3]) << 2;
4445 mask |= INTVAL (operands[4]) << 4;
4446 mask |= INTVAL (operands[5]) << 6;
4447 operands[2] = GEN_INT (mask);
4448
4449 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4450 }
4451 [(set_attr "type" "sselog1")
4452 (set_attr "prefix_data16" "1")
4453 (set_attr "mode" "TI")])
4454
4455 (define_expand "sse2_pshuflw"
4456 [(match_operand:V8HI 0 "register_operand" "")
4457 (match_operand:V8HI 1 "nonimmediate_operand" "")
4458 (match_operand:SI 2 "const_int_operand" "")]
4459 "TARGET_SSE2"
4460 {
4461 int mask = INTVAL (operands[2]);
4462 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4463 GEN_INT ((mask >> 0) & 3),
4464 GEN_INT ((mask >> 2) & 3),
4465 GEN_INT ((mask >> 4) & 3),
4466 GEN_INT ((mask >> 6) & 3)));
4467 DONE;
4468 })
4469
4470 (define_insn "sse2_pshuflw_1"
4471 [(set (match_operand:V8HI 0 "register_operand" "=x")
4472 (vec_select:V8HI
4473 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4474 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4475 (match_operand 3 "const_0_to_3_operand" "")
4476 (match_operand 4 "const_0_to_3_operand" "")
4477 (match_operand 5 "const_0_to_3_operand" "")
4478 (const_int 4)
4479 (const_int 5)
4480 (const_int 6)
4481 (const_int 7)])))]
4482 "TARGET_SSE2"
4483 {
4484 int mask = 0;
4485 mask |= INTVAL (operands[2]) << 0;
4486 mask |= INTVAL (operands[3]) << 2;
4487 mask |= INTVAL (operands[4]) << 4;
4488 mask |= INTVAL (operands[5]) << 6;
4489 operands[2] = GEN_INT (mask);
4490
4491 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4492 }
4493 [(set_attr "type" "sselog")
4494 (set_attr "prefix_rep" "1")
4495 (set_attr "mode" "TI")])
4496
4497 (define_expand "sse2_pshufhw"
4498 [(match_operand:V8HI 0 "register_operand" "")
4499 (match_operand:V8HI 1 "nonimmediate_operand" "")
4500 (match_operand:SI 2 "const_int_operand" "")]
4501 "TARGET_SSE2"
4502 {
4503 int mask = INTVAL (operands[2]);
4504 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4505 GEN_INT (((mask >> 0) & 3) + 4),
4506 GEN_INT (((mask >> 2) & 3) + 4),
4507 GEN_INT (((mask >> 4) & 3) + 4),
4508 GEN_INT (((mask >> 6) & 3) + 4)));
4509 DONE;
4510 })
4511
4512 (define_insn "sse2_pshufhw_1"
4513 [(set (match_operand:V8HI 0 "register_operand" "=x")
4514 (vec_select:V8HI
4515 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4516 (parallel [(const_int 0)
4517 (const_int 1)
4518 (const_int 2)
4519 (const_int 3)
4520 (match_operand 2 "const_4_to_7_operand" "")
4521 (match_operand 3 "const_4_to_7_operand" "")
4522 (match_operand 4 "const_4_to_7_operand" "")
4523 (match_operand 5 "const_4_to_7_operand" "")])))]
4524 "TARGET_SSE2"
4525 {
4526 int mask = 0;
4527 mask |= (INTVAL (operands[2]) - 4) << 0;
4528 mask |= (INTVAL (operands[3]) - 4) << 2;
4529 mask |= (INTVAL (operands[4]) - 4) << 4;
4530 mask |= (INTVAL (operands[5]) - 4) << 6;
4531 operands[2] = GEN_INT (mask);
4532
4533 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4534 }
4535 [(set_attr "type" "sselog")
4536 (set_attr "prefix_rep" "1")
4537 (set_attr "mode" "TI")])
4538
4539 (define_expand "sse2_loadd"
4540 [(set (match_operand:V4SI 0 "register_operand" "")
4541 (vec_merge:V4SI
4542 (vec_duplicate:V4SI
4543 (match_operand:SI 1 "nonimmediate_operand" ""))
4544 (match_dup 2)
4545 (const_int 1)))]
4546 "TARGET_SSE"
4547 "operands[2] = CONST0_RTX (V4SImode);")
4548
4549 (define_insn "sse2_loadld"
4550 [(set (match_operand:V4SI 0 "register_operand" "=Yt,Yi,x,x")
4551 (vec_merge:V4SI
4552 (vec_duplicate:V4SI
4553 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4554 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4555 (const_int 1)))]
4556 "TARGET_SSE"
4557 "@
4558 movd\t{%2, %0|%0, %2}
4559 movd\t{%2, %0|%0, %2}
4560 movss\t{%2, %0|%0, %2}
4561 movss\t{%2, %0|%0, %2}"
4562 [(set_attr "type" "ssemov")
4563 (set_attr "mode" "TI,TI,V4SF,SF")])
4564
4565 (define_insn_and_split "sse2_stored"
4566 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4567 (vec_select:SI
4568 (match_operand:V4SI 1 "register_operand" "x,Yi")
4569 (parallel [(const_int 0)])))]
4570 "TARGET_SSE"
4571 "#"
4572 "&& reload_completed
4573 && (TARGET_INTER_UNIT_MOVES
4574 || MEM_P (operands [0])
4575 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4576 [(set (match_dup 0) (match_dup 1))]
4577 {
4578 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4579 })
4580
4581 (define_expand "sse_storeq"
4582 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4583 (vec_select:DI
4584 (match_operand:V2DI 1 "register_operand" "")
4585 (parallel [(const_int 0)])))]
4586 "TARGET_SSE"
4587 "")
4588
4589 (define_insn "*sse2_storeq_rex64"
4590 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r")
4591 (vec_select:DI
4592 (match_operand:V2DI 1 "register_operand" "x,Yi")
4593 (parallel [(const_int 0)])))]
4594 "TARGET_64BIT && TARGET_SSE"
4595 "#")
4596
4597 (define_insn "*sse2_storeq"
4598 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4599 (vec_select:DI
4600 (match_operand:V2DI 1 "register_operand" "x")
4601 (parallel [(const_int 0)])))]
4602 "TARGET_SSE"
4603 "#")
4604
4605 (define_split
4606 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4607 (vec_select:DI
4608 (match_operand:V2DI 1 "register_operand" "")
4609 (parallel [(const_int 0)])))]
4610 "TARGET_SSE
4611 && reload_completed
4612 && (TARGET_INTER_UNIT_MOVES
4613 || MEM_P (operands [0])
4614 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4615 [(set (match_dup 0) (match_dup 1))]
4616 {
4617 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4618 })
4619
4620 (define_insn "*vec_extractv2di_1_sse2"
4621 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4622 (vec_select:DI
4623 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4624 (parallel [(const_int 1)])))]
4625 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4626 "@
4627 movhps\t{%1, %0|%0, %1}
4628 psrldq\t{$8, %0|%0, 8}
4629 movq\t{%H1, %0|%0, %H1}"
4630 [(set_attr "type" "ssemov,sseishft,ssemov")
4631 (set_attr "memory" "*,none,*")
4632 (set_attr "mode" "V2SF,TI,TI")])
4633
4634 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4635 (define_insn "*vec_extractv2di_1_sse"
4636 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4637 (vec_select:DI
4638 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4639 (parallel [(const_int 1)])))]
4640 "!TARGET_SSE2 && TARGET_SSE
4641 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4642 "@
4643 movhps\t{%1, %0|%0, %1}
4644 movhlps\t{%1, %0|%0, %1}
4645 movlps\t{%H1, %0|%0, %H1}"
4646 [(set_attr "type" "ssemov")
4647 (set_attr "mode" "V2SF,V4SF,V2SF")])
4648
4649 (define_insn "*vec_dupv4si"
4650 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x")
4651 (vec_duplicate:V4SI
4652 (match_operand:SI 1 "register_operand" " Yt,0")))]
4653 "TARGET_SSE"
4654 "@
4655 pshufd\t{$0, %1, %0|%0, %1, 0}
4656 shufps\t{$0, %0, %0|%0, %0, 0}"
4657 [(set_attr "type" "sselog1")
4658 (set_attr "mode" "TI,V4SF")])
4659
4660 (define_insn "*vec_dupv2di"
4661 [(set (match_operand:V2DI 0 "register_operand" "=Yt,x")
4662 (vec_duplicate:V2DI
4663 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4664 "TARGET_SSE"
4665 "@
4666 punpcklqdq\t%0, %0
4667 movlhps\t%0, %0"
4668 [(set_attr "type" "sselog1,ssemov")
4669 (set_attr "mode" "TI,V4SF")])
4670
4671 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4672 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4673 ;; alternatives pretty much forces the MMX alternative to be chosen.
4674 (define_insn "*sse2_concatv2si"
4675 [(set (match_operand:V2SI 0 "register_operand" "=Yt, Yt,*y,*y")
4676 (vec_concat:V2SI
4677 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4678 (match_operand:SI 2 "reg_or_0_operand" " Yt,C ,*y, C")))]
4679 "TARGET_SSE2"
4680 "@
4681 punpckldq\t{%2, %0|%0, %2}
4682 movd\t{%1, %0|%0, %1}
4683 punpckldq\t{%2, %0|%0, %2}
4684 movd\t{%1, %0|%0, %1}"
4685 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4686 (set_attr "mode" "TI,TI,DI,DI")])
4687
4688 (define_insn "*sse1_concatv2si"
4689 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4690 (vec_concat:V2SI
4691 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4692 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4693 "TARGET_SSE"
4694 "@
4695 unpcklps\t{%2, %0|%0, %2}
4696 movss\t{%1, %0|%0, %1}
4697 punpckldq\t{%2, %0|%0, %2}
4698 movd\t{%1, %0|%0, %1}"
4699 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4700 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4701
4702 (define_insn "*vec_concatv4si_1"
4703 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x,x")
4704 (vec_concat:V4SI
4705 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4706 (match_operand:V2SI 2 "nonimmediate_operand" " Yt,x,m")))]
4707 "TARGET_SSE"
4708 "@
4709 punpcklqdq\t{%2, %0|%0, %2}
4710 movlhps\t{%2, %0|%0, %2}
4711 movhps\t{%2, %0|%0, %2}"
4712 [(set_attr "type" "sselog,ssemov,ssemov")
4713 (set_attr "mode" "TI,V4SF,V2SF")])
4714
4715 (define_insn "vec_concatv2di"
4716 [(set (match_operand:V2DI 0 "register_operand" "=Yt,?Yt,Yt,x,x,x")
4717 (vec_concat:V2DI
4718 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4719 (match_operand:DI 2 "vector_move_operand" " C, C,Yt,x,m,0")))]
4720 "TARGET_SSE"
4721 "@
4722 movq\t{%1, %0|%0, %1}
4723 movq2dq\t{%1, %0|%0, %1}
4724 punpcklqdq\t{%2, %0|%0, %2}
4725 movlhps\t{%2, %0|%0, %2}
4726 movhps\t{%2, %0|%0, %2}
4727 movlps\t{%1, %0|%0, %1}"
4728 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4729 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4730
4731 (define_expand "vec_setv2di"
4732 [(match_operand:V2DI 0 "register_operand" "")
4733 (match_operand:DI 1 "register_operand" "")
4734 (match_operand 2 "const_int_operand" "")]
4735 "TARGET_SSE"
4736 {
4737 ix86_expand_vector_set (false, operands[0], operands[1],
4738 INTVAL (operands[2]));
4739 DONE;
4740 })
4741
4742 (define_expand "vec_extractv2di"
4743 [(match_operand:DI 0 "register_operand" "")
4744 (match_operand:V2DI 1 "register_operand" "")
4745 (match_operand 2 "const_int_operand" "")]
4746 "TARGET_SSE"
4747 {
4748 ix86_expand_vector_extract (false, operands[0], operands[1],
4749 INTVAL (operands[2]));
4750 DONE;
4751 })
4752
4753 (define_expand "vec_initv2di"
4754 [(match_operand:V2DI 0 "register_operand" "")
4755 (match_operand 1 "" "")]
4756 "TARGET_SSE"
4757 {
4758 ix86_expand_vector_init (false, operands[0], operands[1]);
4759 DONE;
4760 })
4761
4762 (define_expand "vec_setv4si"
4763 [(match_operand:V4SI 0 "register_operand" "")
4764 (match_operand:SI 1 "register_operand" "")
4765 (match_operand 2 "const_int_operand" "")]
4766 "TARGET_SSE"
4767 {
4768 ix86_expand_vector_set (false, operands[0], operands[1],
4769 INTVAL (operands[2]));
4770 DONE;
4771 })
4772
4773 (define_expand "vec_extractv4si"
4774 [(match_operand:SI 0 "register_operand" "")
4775 (match_operand:V4SI 1 "register_operand" "")
4776 (match_operand 2 "const_int_operand" "")]
4777 "TARGET_SSE"
4778 {
4779 ix86_expand_vector_extract (false, operands[0], operands[1],
4780 INTVAL (operands[2]));
4781 DONE;
4782 })
4783
4784 (define_expand "vec_initv4si"
4785 [(match_operand:V4SI 0 "register_operand" "")
4786 (match_operand 1 "" "")]
4787 "TARGET_SSE"
4788 {
4789 ix86_expand_vector_init (false, operands[0], operands[1]);
4790 DONE;
4791 })
4792
4793 (define_expand "vec_setv8hi"
4794 [(match_operand:V8HI 0 "register_operand" "")
4795 (match_operand:HI 1 "register_operand" "")
4796 (match_operand 2 "const_int_operand" "")]
4797 "TARGET_SSE"
4798 {
4799 ix86_expand_vector_set (false, operands[0], operands[1],
4800 INTVAL (operands[2]));
4801 DONE;
4802 })
4803
4804 (define_expand "vec_extractv8hi"
4805 [(match_operand:HI 0 "register_operand" "")
4806 (match_operand:V8HI 1 "register_operand" "")
4807 (match_operand 2 "const_int_operand" "")]
4808 "TARGET_SSE"
4809 {
4810 ix86_expand_vector_extract (false, operands[0], operands[1],
4811 INTVAL (operands[2]));
4812 DONE;
4813 })
4814
4815 (define_expand "vec_initv8hi"
4816 [(match_operand:V8HI 0 "register_operand" "")
4817 (match_operand 1 "" "")]
4818 "TARGET_SSE"
4819 {
4820 ix86_expand_vector_init (false, operands[0], operands[1]);
4821 DONE;
4822 })
4823
4824 (define_expand "vec_setv16qi"
4825 [(match_operand:V16QI 0 "register_operand" "")
4826 (match_operand:QI 1 "register_operand" "")
4827 (match_operand 2 "const_int_operand" "")]
4828 "TARGET_SSE"
4829 {
4830 ix86_expand_vector_set (false, operands[0], operands[1],
4831 INTVAL (operands[2]));
4832 DONE;
4833 })
4834
4835 (define_expand "vec_extractv16qi"
4836 [(match_operand:QI 0 "register_operand" "")
4837 (match_operand:V16QI 1 "register_operand" "")
4838 (match_operand 2 "const_int_operand" "")]
4839 "TARGET_SSE"
4840 {
4841 ix86_expand_vector_extract (false, operands[0], operands[1],
4842 INTVAL (operands[2]));
4843 DONE;
4844 })
4845
4846 (define_expand "vec_initv16qi"
4847 [(match_operand:V16QI 0 "register_operand" "")
4848 (match_operand 1 "" "")]
4849 "TARGET_SSE"
4850 {
4851 ix86_expand_vector_init (false, operands[0], operands[1]);
4852 DONE;
4853 })
4854
4855 (define_expand "vec_unpacku_hi_v16qi"
4856 [(match_operand:V8HI 0 "register_operand" "")
4857 (match_operand:V16QI 1 "register_operand" "")]
4858 "TARGET_SSE2"
4859 {
4860 if (TARGET_SSE4_1)
4861 ix86_expand_sse4_unpack (operands, true, true);
4862 else
4863 ix86_expand_sse_unpack (operands, true, true);
4864 DONE;
4865 })
4866
4867 (define_expand "vec_unpacks_hi_v16qi"
4868 [(match_operand:V8HI 0 "register_operand" "")
4869 (match_operand:V16QI 1 "register_operand" "")]
4870 "TARGET_SSE2"
4871 {
4872 if (TARGET_SSE4_1)
4873 ix86_expand_sse4_unpack (operands, false, true);
4874 else
4875 ix86_expand_sse_unpack (operands, false, true);
4876 DONE;
4877 })
4878
4879 (define_expand "vec_unpacku_lo_v16qi"
4880 [(match_operand:V8HI 0 "register_operand" "")
4881 (match_operand:V16QI 1 "register_operand" "")]
4882 "TARGET_SSE2"
4883 {
4884 if (TARGET_SSE4_1)
4885 ix86_expand_sse4_unpack (operands, true, false);
4886 else
4887 ix86_expand_sse_unpack (operands, true, false);
4888 DONE;
4889 })
4890
4891 (define_expand "vec_unpacks_lo_v16qi"
4892 [(match_operand:V8HI 0 "register_operand" "")
4893 (match_operand:V16QI 1 "register_operand" "")]
4894 "TARGET_SSE2"
4895 {
4896 if (TARGET_SSE4_1)
4897 ix86_expand_sse4_unpack (operands, false, false);
4898 else
4899 ix86_expand_sse_unpack (operands, false, false);
4900 DONE;
4901 })
4902
4903 (define_expand "vec_unpacku_hi_v8hi"
4904 [(match_operand:V4SI 0 "register_operand" "")
4905 (match_operand:V8HI 1 "register_operand" "")]
4906 "TARGET_SSE2"
4907 {
4908 if (TARGET_SSE4_1)
4909 ix86_expand_sse4_unpack (operands, true, true);
4910 else
4911 ix86_expand_sse_unpack (operands, true, true);
4912 DONE;
4913 })
4914
4915 (define_expand "vec_unpacks_hi_v8hi"
4916 [(match_operand:V4SI 0 "register_operand" "")
4917 (match_operand:V8HI 1 "register_operand" "")]
4918 "TARGET_SSE2"
4919 {
4920 if (TARGET_SSE4_1)
4921 ix86_expand_sse4_unpack (operands, false, true);
4922 else
4923 ix86_expand_sse_unpack (operands, false, true);
4924 DONE;
4925 })
4926
4927 (define_expand "vec_unpacku_lo_v8hi"
4928 [(match_operand:V4SI 0 "register_operand" "")
4929 (match_operand:V8HI 1 "register_operand" "")]
4930 "TARGET_SSE2"
4931 {
4932 if (TARGET_SSE4_1)
4933 ix86_expand_sse4_unpack (operands, true, false);
4934 else
4935 ix86_expand_sse_unpack (operands, true, false);
4936 DONE;
4937 })
4938
4939 (define_expand "vec_unpacks_lo_v8hi"
4940 [(match_operand:V4SI 0 "register_operand" "")
4941 (match_operand:V8HI 1 "register_operand" "")]
4942 "TARGET_SSE2"
4943 {
4944 if (TARGET_SSE4_1)
4945 ix86_expand_sse4_unpack (operands, false, false);
4946 else
4947 ix86_expand_sse_unpack (operands, false, false);
4948 DONE;
4949 })
4950
4951 (define_expand "vec_unpacku_hi_v4si"
4952 [(match_operand:V2DI 0 "register_operand" "")
4953 (match_operand:V4SI 1 "register_operand" "")]
4954 "TARGET_SSE2"
4955 {
4956 if (TARGET_SSE4_1)
4957 ix86_expand_sse4_unpack (operands, true, true);
4958 else
4959 ix86_expand_sse_unpack (operands, true, true);
4960 DONE;
4961 })
4962
4963 (define_expand "vec_unpacks_hi_v4si"
4964 [(match_operand:V2DI 0 "register_operand" "")
4965 (match_operand:V4SI 1 "register_operand" "")]
4966 "TARGET_SSE2"
4967 {
4968 if (TARGET_SSE4_1)
4969 ix86_expand_sse4_unpack (operands, false, true);
4970 else
4971 ix86_expand_sse_unpack (operands, false, true);
4972 DONE;
4973 })
4974
4975 (define_expand "vec_unpacku_lo_v4si"
4976 [(match_operand:V2DI 0 "register_operand" "")
4977 (match_operand:V4SI 1 "register_operand" "")]
4978 "TARGET_SSE2"
4979 {
4980 if (TARGET_SSE4_1)
4981 ix86_expand_sse4_unpack (operands, true, false);
4982 else
4983 ix86_expand_sse_unpack (operands, true, false);
4984 DONE;
4985 })
4986
4987 (define_expand "vec_unpacks_lo_v4si"
4988 [(match_operand:V2DI 0 "register_operand" "")
4989 (match_operand:V4SI 1 "register_operand" "")]
4990 "TARGET_SSE2"
4991 {
4992 if (TARGET_SSE4_1)
4993 ix86_expand_sse4_unpack (operands, false, false);
4994 else
4995 ix86_expand_sse_unpack (operands, false, false);
4996 DONE;
4997 })
4998
4999 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5000 ;;
5001 ;; Miscellaneous
5002 ;;
5003 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5004
5005 (define_insn "sse2_uavgv16qi3"
5006 [(set (match_operand:V16QI 0 "register_operand" "=x")
5007 (truncate:V16QI
5008 (lshiftrt:V16HI
5009 (plus:V16HI
5010 (plus:V16HI
5011 (zero_extend:V16HI
5012 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5013 (zero_extend:V16HI
5014 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5015 (const_vector:V16QI [(const_int 1) (const_int 1)
5016 (const_int 1) (const_int 1)
5017 (const_int 1) (const_int 1)
5018 (const_int 1) (const_int 1)
5019 (const_int 1) (const_int 1)
5020 (const_int 1) (const_int 1)
5021 (const_int 1) (const_int 1)
5022 (const_int 1) (const_int 1)]))
5023 (const_int 1))))]
5024 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5025 "pavgb\t{%2, %0|%0, %2}"
5026 [(set_attr "type" "sseiadd")
5027 (set_attr "prefix_data16" "1")
5028 (set_attr "mode" "TI")])
5029
5030 (define_insn "sse2_uavgv8hi3"
5031 [(set (match_operand:V8HI 0 "register_operand" "=x")
5032 (truncate:V8HI
5033 (lshiftrt:V8SI
5034 (plus:V8SI
5035 (plus:V8SI
5036 (zero_extend:V8SI
5037 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5038 (zero_extend:V8SI
5039 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5040 (const_vector:V8HI [(const_int 1) (const_int 1)
5041 (const_int 1) (const_int 1)
5042 (const_int 1) (const_int 1)
5043 (const_int 1) (const_int 1)]))
5044 (const_int 1))))]
5045 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5046 "pavgw\t{%2, %0|%0, %2}"
5047 [(set_attr "type" "sseiadd")
5048 (set_attr "prefix_data16" "1")
5049 (set_attr "mode" "TI")])
5050
5051 ;; The correct representation for this is absolutely enormous, and
5052 ;; surely not generally useful.
5053 (define_insn "sse2_psadbw"
5054 [(set (match_operand:V2DI 0 "register_operand" "=x")
5055 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5056 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5057 UNSPEC_PSADBW))]
5058 "TARGET_SSE2"
5059 "psadbw\t{%2, %0|%0, %2}"
5060 [(set_attr "type" "sseiadd")
5061 (set_attr "prefix_data16" "1")
5062 (set_attr "mode" "TI")])
5063
5064 (define_insn "sse_movmskps"
5065 [(set (match_operand:SI 0 "register_operand" "=r")
5066 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
5067 UNSPEC_MOVMSK))]
5068 "TARGET_SSE"
5069 "movmskps\t{%1, %0|%0, %1}"
5070 [(set_attr "type" "ssecvt")
5071 (set_attr "mode" "V4SF")])
5072
5073 (define_insn "sse2_movmskpd"
5074 [(set (match_operand:SI 0 "register_operand" "=r")
5075 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
5076 UNSPEC_MOVMSK))]
5077 "TARGET_SSE2"
5078 "movmskpd\t{%1, %0|%0, %1}"
5079 [(set_attr "type" "ssecvt")
5080 (set_attr "mode" "V2DF")])
5081
5082 (define_insn "sse2_pmovmskb"
5083 [(set (match_operand:SI 0 "register_operand" "=r")
5084 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5085 UNSPEC_MOVMSK))]
5086 "TARGET_SSE2"
5087 "pmovmskb\t{%1, %0|%0, %1}"
5088 [(set_attr "type" "ssecvt")
5089 (set_attr "prefix_data16" "1")
5090 (set_attr "mode" "SI")])
5091
5092 (define_expand "sse2_maskmovdqu"
5093 [(set (match_operand:V16QI 0 "memory_operand" "")
5094 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5095 (match_operand:V16QI 2 "register_operand" "x")
5096 (match_dup 0)]
5097 UNSPEC_MASKMOV))]
5098 "TARGET_SSE2"
5099 "")
5100
5101 (define_insn "*sse2_maskmovdqu"
5102 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5103 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5104 (match_operand:V16QI 2 "register_operand" "x")
5105 (mem:V16QI (match_dup 0))]
5106 UNSPEC_MASKMOV))]
5107 "TARGET_SSE2 && !TARGET_64BIT"
5108 ;; @@@ check ordering of operands in intel/nonintel syntax
5109 "maskmovdqu\t{%2, %1|%1, %2}"
5110 [(set_attr "type" "ssecvt")
5111 (set_attr "prefix_data16" "1")
5112 (set_attr "mode" "TI")])
5113
5114 (define_insn "*sse2_maskmovdqu_rex64"
5115 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5116 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5117 (match_operand:V16QI 2 "register_operand" "x")
5118 (mem:V16QI (match_dup 0))]
5119 UNSPEC_MASKMOV))]
5120 "TARGET_SSE2 && TARGET_64BIT"
5121 ;; @@@ check ordering of operands in intel/nonintel syntax
5122 "maskmovdqu\t{%2, %1|%1, %2}"
5123 [(set_attr "type" "ssecvt")
5124 (set_attr "prefix_data16" "1")
5125 (set_attr "mode" "TI")])
5126
5127 (define_insn "sse_ldmxcsr"
5128 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5129 UNSPECV_LDMXCSR)]
5130 "TARGET_SSE"
5131 "ldmxcsr\t%0"
5132 [(set_attr "type" "sse")
5133 (set_attr "memory" "load")])
5134
5135 (define_insn "sse_stmxcsr"
5136 [(set (match_operand:SI 0 "memory_operand" "=m")
5137 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5138 "TARGET_SSE"
5139 "stmxcsr\t%0"
5140 [(set_attr "type" "sse")
5141 (set_attr "memory" "store")])
5142
5143 (define_expand "sse_sfence"
5144 [(set (match_dup 0)
5145 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5146 "TARGET_SSE || TARGET_3DNOW_A"
5147 {
5148 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5149 MEM_VOLATILE_P (operands[0]) = 1;
5150 })
5151
5152 (define_insn "*sse_sfence"
5153 [(set (match_operand:BLK 0 "" "")
5154 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5155 "TARGET_SSE || TARGET_3DNOW_A"
5156 "sfence"
5157 [(set_attr "type" "sse")
5158 (set_attr "memory" "unknown")])
5159
5160 (define_insn "sse2_clflush"
5161 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5162 UNSPECV_CLFLUSH)]
5163 "TARGET_SSE2"
5164 "clflush\t%a0"
5165 [(set_attr "type" "sse")
5166 (set_attr "memory" "unknown")])
5167
5168 (define_expand "sse2_mfence"
5169 [(set (match_dup 0)
5170 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5171 "TARGET_SSE2"
5172 {
5173 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5174 MEM_VOLATILE_P (operands[0]) = 1;
5175 })
5176
5177 (define_insn "*sse2_mfence"
5178 [(set (match_operand:BLK 0 "" "")
5179 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5180 "TARGET_SSE2"
5181 "mfence"
5182 [(set_attr "type" "sse")
5183 (set_attr "memory" "unknown")])
5184
5185 (define_expand "sse2_lfence"
5186 [(set (match_dup 0)
5187 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5188 "TARGET_SSE2"
5189 {
5190 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5191 MEM_VOLATILE_P (operands[0]) = 1;
5192 })
5193
5194 (define_insn "*sse2_lfence"
5195 [(set (match_operand:BLK 0 "" "")
5196 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5197 "TARGET_SSE2"
5198 "lfence"
5199 [(set_attr "type" "sse")
5200 (set_attr "memory" "unknown")])
5201
5202 (define_insn "sse3_mwait"
5203 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5204 (match_operand:SI 1 "register_operand" "c")]
5205 UNSPECV_MWAIT)]
5206 "TARGET_SSE3"
5207 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5208 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5209 ;; we only need to set up 32bit registers.
5210 "mwait"
5211 [(set_attr "length" "3")])
5212
5213 (define_insn "sse3_monitor"
5214 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5215 (match_operand:SI 1 "register_operand" "c")
5216 (match_operand:SI 2 "register_operand" "d")]
5217 UNSPECV_MONITOR)]
5218 "TARGET_SSE3 && !TARGET_64BIT"
5219 "monitor\t%0, %1, %2"
5220 [(set_attr "length" "3")])
5221
5222 (define_insn "sse3_monitor64"
5223 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5224 (match_operand:SI 1 "register_operand" "c")
5225 (match_operand:SI 2 "register_operand" "d")]
5226 UNSPECV_MONITOR)]
5227 "TARGET_SSE3 && TARGET_64BIT"
5228 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5229 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5230 ;; zero extended to 64bit, we only need to set up 32bit registers.
5231 "monitor"
5232 [(set_attr "length" "3")])
5233
5234 ;; SSSE3
5235 (define_insn "ssse3_phaddwv8hi3"
5236 [(set (match_operand:V8HI 0 "register_operand" "=x")
5237 (vec_concat:V8HI
5238 (vec_concat:V4HI
5239 (vec_concat:V2HI
5240 (plus:HI
5241 (vec_select:HI
5242 (match_operand:V8HI 1 "register_operand" "0")
5243 (parallel [(const_int 0)]))
5244 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5245 (plus:HI
5246 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5247 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5248 (vec_concat:V2HI
5249 (plus:HI
5250 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5251 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5252 (plus:HI
5253 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5254 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5255 (vec_concat:V4HI
5256 (vec_concat:V2HI
5257 (plus:HI
5258 (vec_select:HI
5259 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5260 (parallel [(const_int 0)]))
5261 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5262 (plus:HI
5263 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5264 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5265 (vec_concat:V2HI
5266 (plus:HI
5267 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5268 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5269 (plus:HI
5270 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5271 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5272 "TARGET_SSSE3"
5273 "phaddw\t{%2, %0|%0, %2}"
5274 [(set_attr "type" "sseiadd")
5275 (set_attr "prefix_data16" "1")
5276 (set_attr "prefix_extra" "1")
5277 (set_attr "mode" "TI")])
5278
5279 (define_insn "ssse3_phaddwv4hi3"
5280 [(set (match_operand:V4HI 0 "register_operand" "=y")
5281 (vec_concat:V4HI
5282 (vec_concat:V2HI
5283 (plus:HI
5284 (vec_select:HI
5285 (match_operand:V4HI 1 "register_operand" "0")
5286 (parallel [(const_int 0)]))
5287 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5288 (plus:HI
5289 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5290 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5291 (vec_concat:V2HI
5292 (plus:HI
5293 (vec_select:HI
5294 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5295 (parallel [(const_int 0)]))
5296 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5297 (plus:HI
5298 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5299 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5300 "TARGET_SSSE3"
5301 "phaddw\t{%2, %0|%0, %2}"
5302 [(set_attr "type" "sseiadd")
5303 (set_attr "prefix_extra" "1")
5304 (set_attr "mode" "DI")])
5305
5306 (define_insn "ssse3_phadddv4si3"
5307 [(set (match_operand:V4SI 0 "register_operand" "=x")
5308 (vec_concat:V4SI
5309 (vec_concat:V2SI
5310 (plus:SI
5311 (vec_select:SI
5312 (match_operand:V4SI 1 "register_operand" "0")
5313 (parallel [(const_int 0)]))
5314 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5315 (plus:SI
5316 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5317 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5318 (vec_concat:V2SI
5319 (plus:SI
5320 (vec_select:SI
5321 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5322 (parallel [(const_int 0)]))
5323 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5324 (plus:SI
5325 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5326 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5327 "TARGET_SSSE3"
5328 "phaddd\t{%2, %0|%0, %2}"
5329 [(set_attr "type" "sseiadd")
5330 (set_attr "prefix_data16" "1")
5331 (set_attr "prefix_extra" "1")
5332 (set_attr "mode" "TI")])
5333
5334 (define_insn "ssse3_phadddv2si3"
5335 [(set (match_operand:V2SI 0 "register_operand" "=y")
5336 (vec_concat:V2SI
5337 (plus:SI
5338 (vec_select:SI
5339 (match_operand:V2SI 1 "register_operand" "0")
5340 (parallel [(const_int 0)]))
5341 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5342 (plus:SI
5343 (vec_select:SI
5344 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5345 (parallel [(const_int 0)]))
5346 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5347 "TARGET_SSSE3"
5348 "phaddd\t{%2, %0|%0, %2}"
5349 [(set_attr "type" "sseiadd")
5350 (set_attr "prefix_extra" "1")
5351 (set_attr "mode" "DI")])
5352
5353 (define_insn "ssse3_phaddswv8hi3"
5354 [(set (match_operand:V8HI 0 "register_operand" "=x")
5355 (vec_concat:V8HI
5356 (vec_concat:V4HI
5357 (vec_concat:V2HI
5358 (ss_plus:HI
5359 (vec_select:HI
5360 (match_operand:V8HI 1 "register_operand" "0")
5361 (parallel [(const_int 0)]))
5362 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5363 (ss_plus:HI
5364 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5365 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5366 (vec_concat:V2HI
5367 (ss_plus:HI
5368 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5369 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5370 (ss_plus:HI
5371 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5372 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5373 (vec_concat:V4HI
5374 (vec_concat:V2HI
5375 (ss_plus:HI
5376 (vec_select:HI
5377 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5378 (parallel [(const_int 0)]))
5379 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5380 (ss_plus:HI
5381 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5382 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5383 (vec_concat:V2HI
5384 (ss_plus:HI
5385 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5386 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5387 (ss_plus:HI
5388 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5389 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5390 "TARGET_SSSE3"
5391 "phaddsw\t{%2, %0|%0, %2}"
5392 [(set_attr "type" "sseiadd")
5393 (set_attr "prefix_data16" "1")
5394 (set_attr "prefix_extra" "1")
5395 (set_attr "mode" "TI")])
5396
5397 (define_insn "ssse3_phaddswv4hi3"
5398 [(set (match_operand:V4HI 0 "register_operand" "=y")
5399 (vec_concat:V4HI
5400 (vec_concat:V2HI
5401 (ss_plus:HI
5402 (vec_select:HI
5403 (match_operand:V4HI 1 "register_operand" "0")
5404 (parallel [(const_int 0)]))
5405 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5406 (ss_plus:HI
5407 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5408 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5409 (vec_concat:V2HI
5410 (ss_plus:HI
5411 (vec_select:HI
5412 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5413 (parallel [(const_int 0)]))
5414 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5415 (ss_plus:HI
5416 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5417 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5418 "TARGET_SSSE3"
5419 "phaddsw\t{%2, %0|%0, %2}"
5420 [(set_attr "type" "sseiadd")
5421 (set_attr "prefix_extra" "1")
5422 (set_attr "mode" "DI")])
5423
5424 (define_insn "ssse3_phsubwv8hi3"
5425 [(set (match_operand:V8HI 0 "register_operand" "=x")
5426 (vec_concat:V8HI
5427 (vec_concat:V4HI
5428 (vec_concat:V2HI
5429 (minus:HI
5430 (vec_select:HI
5431 (match_operand:V8HI 1 "register_operand" "0")
5432 (parallel [(const_int 0)]))
5433 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5434 (minus:HI
5435 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5436 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5437 (vec_concat:V2HI
5438 (minus:HI
5439 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5440 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5441 (minus:HI
5442 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5443 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5444 (vec_concat:V4HI
5445 (vec_concat:V2HI
5446 (minus:HI
5447 (vec_select:HI
5448 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5449 (parallel [(const_int 0)]))
5450 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5451 (minus:HI
5452 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5453 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5454 (vec_concat:V2HI
5455 (minus:HI
5456 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5457 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5458 (minus:HI
5459 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5460 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5461 "TARGET_SSSE3"
5462 "phsubw\t{%2, %0|%0, %2}"
5463 [(set_attr "type" "sseiadd")
5464 (set_attr "prefix_data16" "1")
5465 (set_attr "prefix_extra" "1")
5466 (set_attr "mode" "TI")])
5467
5468 (define_insn "ssse3_phsubwv4hi3"
5469 [(set (match_operand:V4HI 0 "register_operand" "=y")
5470 (vec_concat:V4HI
5471 (vec_concat:V2HI
5472 (minus:HI
5473 (vec_select:HI
5474 (match_operand:V4HI 1 "register_operand" "0")
5475 (parallel [(const_int 0)]))
5476 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5477 (minus:HI
5478 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5479 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5480 (vec_concat:V2HI
5481 (minus:HI
5482 (vec_select:HI
5483 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5484 (parallel [(const_int 0)]))
5485 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5486 (minus:HI
5487 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5488 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5489 "TARGET_SSSE3"
5490 "phsubw\t{%2, %0|%0, %2}"
5491 [(set_attr "type" "sseiadd")
5492 (set_attr "prefix_extra" "1")
5493 (set_attr "mode" "DI")])
5494
5495 (define_insn "ssse3_phsubdv4si3"
5496 [(set (match_operand:V4SI 0 "register_operand" "=x")
5497 (vec_concat:V4SI
5498 (vec_concat:V2SI
5499 (minus:SI
5500 (vec_select:SI
5501 (match_operand:V4SI 1 "register_operand" "0")
5502 (parallel [(const_int 0)]))
5503 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5504 (minus:SI
5505 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5506 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5507 (vec_concat:V2SI
5508 (minus:SI
5509 (vec_select:SI
5510 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5511 (parallel [(const_int 0)]))
5512 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5513 (minus:SI
5514 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5515 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5516 "TARGET_SSSE3"
5517 "phsubd\t{%2, %0|%0, %2}"
5518 [(set_attr "type" "sseiadd")
5519 (set_attr "prefix_data16" "1")
5520 (set_attr "prefix_extra" "1")
5521 (set_attr "mode" "TI")])
5522
5523 (define_insn "ssse3_phsubdv2si3"
5524 [(set (match_operand:V2SI 0 "register_operand" "=y")
5525 (vec_concat:V2SI
5526 (minus:SI
5527 (vec_select:SI
5528 (match_operand:V2SI 1 "register_operand" "0")
5529 (parallel [(const_int 0)]))
5530 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5531 (minus:SI
5532 (vec_select:SI
5533 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5534 (parallel [(const_int 0)]))
5535 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5536 "TARGET_SSSE3"
5537 "phsubd\t{%2, %0|%0, %2}"
5538 [(set_attr "type" "sseiadd")
5539 (set_attr "prefix_extra" "1")
5540 (set_attr "mode" "DI")])
5541
5542 (define_insn "ssse3_phsubswv8hi3"
5543 [(set (match_operand:V8HI 0 "register_operand" "=x")
5544 (vec_concat:V8HI
5545 (vec_concat:V4HI
5546 (vec_concat:V2HI
5547 (ss_minus:HI
5548 (vec_select:HI
5549 (match_operand:V8HI 1 "register_operand" "0")
5550 (parallel [(const_int 0)]))
5551 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5552 (ss_minus:HI
5553 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5554 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5555 (vec_concat:V2HI
5556 (ss_minus:HI
5557 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5558 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5559 (ss_minus:HI
5560 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5561 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5562 (vec_concat:V4HI
5563 (vec_concat:V2HI
5564 (ss_minus:HI
5565 (vec_select:HI
5566 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5567 (parallel [(const_int 0)]))
5568 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5569 (ss_minus:HI
5570 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5571 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5572 (vec_concat:V2HI
5573 (ss_minus:HI
5574 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5575 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5576 (ss_minus:HI
5577 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5578 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5579 "TARGET_SSSE3"
5580 "phsubsw\t{%2, %0|%0, %2}"
5581 [(set_attr "type" "sseiadd")
5582 (set_attr "prefix_data16" "1")
5583 (set_attr "prefix_extra" "1")
5584 (set_attr "mode" "TI")])
5585
5586 (define_insn "ssse3_phsubswv4hi3"
5587 [(set (match_operand:V4HI 0 "register_operand" "=y")
5588 (vec_concat:V4HI
5589 (vec_concat:V2HI
5590 (ss_minus:HI
5591 (vec_select:HI
5592 (match_operand:V4HI 1 "register_operand" "0")
5593 (parallel [(const_int 0)]))
5594 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5595 (ss_minus:HI
5596 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5597 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5598 (vec_concat:V2HI
5599 (ss_minus:HI
5600 (vec_select:HI
5601 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5602 (parallel [(const_int 0)]))
5603 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5604 (ss_minus:HI
5605 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5606 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5607 "TARGET_SSSE3"
5608 "phsubsw\t{%2, %0|%0, %2}"
5609 [(set_attr "type" "sseiadd")
5610 (set_attr "prefix_extra" "1")
5611 (set_attr "mode" "DI")])
5612
5613 (define_insn "ssse3_pmaddubswv8hi3"
5614 [(set (match_operand:V8HI 0 "register_operand" "=x")
5615 (ss_plus:V8HI
5616 (mult:V8HI
5617 (zero_extend:V8HI
5618 (vec_select:V4QI
5619 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5620 (parallel [(const_int 0)
5621 (const_int 2)
5622 (const_int 4)
5623 (const_int 6)
5624 (const_int 8)
5625 (const_int 10)
5626 (const_int 12)
5627 (const_int 14)])))
5628 (sign_extend:V8HI
5629 (vec_select:V8QI
5630 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5631 (parallel [(const_int 0)
5632 (const_int 2)
5633 (const_int 4)
5634 (const_int 6)
5635 (const_int 8)
5636 (const_int 10)
5637 (const_int 12)
5638 (const_int 14)]))))
5639 (mult:V8HI
5640 (zero_extend:V8HI
5641 (vec_select:V16QI (match_dup 1)
5642 (parallel [(const_int 1)
5643 (const_int 3)
5644 (const_int 5)
5645 (const_int 7)
5646 (const_int 9)
5647 (const_int 11)
5648 (const_int 13)
5649 (const_int 15)])))
5650 (sign_extend:V8HI
5651 (vec_select:V16QI (match_dup 2)
5652 (parallel [(const_int 1)
5653 (const_int 3)
5654 (const_int 5)
5655 (const_int 7)
5656 (const_int 9)
5657 (const_int 11)
5658 (const_int 13)
5659 (const_int 15)]))))))]
5660 "TARGET_SSSE3"
5661 "pmaddubsw\t{%2, %0|%0, %2}"
5662 [(set_attr "type" "sseiadd")
5663 (set_attr "prefix_data16" "1")
5664 (set_attr "prefix_extra" "1")
5665 (set_attr "mode" "TI")])
5666
5667 (define_insn "ssse3_pmaddubswv4hi3"
5668 [(set (match_operand:V4HI 0 "register_operand" "=y")
5669 (ss_plus:V4HI
5670 (mult:V4HI
5671 (zero_extend:V4HI
5672 (vec_select:V4QI
5673 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
5674 (parallel [(const_int 0)
5675 (const_int 2)
5676 (const_int 4)
5677 (const_int 6)])))
5678 (sign_extend:V4HI
5679 (vec_select:V4QI
5680 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5681 (parallel [(const_int 0)
5682 (const_int 2)
5683 (const_int 4)
5684 (const_int 6)]))))
5685 (mult:V4HI
5686 (zero_extend:V4HI
5687 (vec_select:V8QI (match_dup 1)
5688 (parallel [(const_int 1)
5689 (const_int 3)
5690 (const_int 5)
5691 (const_int 7)])))
5692 (sign_extend:V4HI
5693 (vec_select:V8QI (match_dup 2)
5694 (parallel [(const_int 1)
5695 (const_int 3)
5696 (const_int 5)
5697 (const_int 7)]))))))]
5698 "TARGET_SSSE3"
5699 "pmaddubsw\t{%2, %0|%0, %2}"
5700 [(set_attr "type" "sseiadd")
5701 (set_attr "prefix_extra" "1")
5702 (set_attr "mode" "DI")])
5703
5704 (define_insn "ssse3_pmulhrswv8hi3"
5705 [(set (match_operand:V8HI 0 "register_operand" "=x")
5706 (truncate:V8HI
5707 (lshiftrt:V8SI
5708 (plus:V8SI
5709 (lshiftrt:V8SI
5710 (mult:V8SI
5711 (sign_extend:V8SI
5712 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5713 (sign_extend:V8SI
5714 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5715 (const_int 14))
5716 (const_vector:V8HI [(const_int 1) (const_int 1)
5717 (const_int 1) (const_int 1)
5718 (const_int 1) (const_int 1)
5719 (const_int 1) (const_int 1)]))
5720 (const_int 1))))]
5721 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5722 "pmulhrsw\t{%2, %0|%0, %2}"
5723 [(set_attr "type" "sseimul")
5724 (set_attr "prefix_data16" "1")
5725 (set_attr "prefix_extra" "1")
5726 (set_attr "mode" "TI")])
5727
5728 (define_insn "ssse3_pmulhrswv4hi3"
5729 [(set (match_operand:V4HI 0 "register_operand" "=y")
5730 (truncate:V4HI
5731 (lshiftrt:V4SI
5732 (plus:V4SI
5733 (lshiftrt:V4SI
5734 (mult:V4SI
5735 (sign_extend:V4SI
5736 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5737 (sign_extend:V4SI
5738 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5739 (const_int 14))
5740 (const_vector:V4HI [(const_int 1) (const_int 1)
5741 (const_int 1) (const_int 1)]))
5742 (const_int 1))))]
5743 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5744 "pmulhrsw\t{%2, %0|%0, %2}"
5745 [(set_attr "type" "sseimul")
5746 (set_attr "prefix_extra" "1")
5747 (set_attr "mode" "DI")])
5748
5749 (define_insn "ssse3_pshufbv16qi3"
5750 [(set (match_operand:V16QI 0 "register_operand" "=x")
5751 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5752 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5753 UNSPEC_PSHUFB))]
5754 "TARGET_SSSE3"
5755 "pshufb\t{%2, %0|%0, %2}";
5756 [(set_attr "type" "sselog1")
5757 (set_attr "prefix_data16" "1")
5758 (set_attr "prefix_extra" "1")
5759 (set_attr "mode" "TI")])
5760
5761 (define_insn "ssse3_pshufbv8qi3"
5762 [(set (match_operand:V8QI 0 "register_operand" "=y")
5763 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5764 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5765 UNSPEC_PSHUFB))]
5766 "TARGET_SSSE3"
5767 "pshufb\t{%2, %0|%0, %2}";
5768 [(set_attr "type" "sselog1")
5769 (set_attr "prefix_extra" "1")
5770 (set_attr "mode" "DI")])
5771
5772 (define_insn "ssse3_psign<mode>3"
5773 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5774 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5775 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5776 UNSPEC_PSIGN))]
5777 "TARGET_SSSE3"
5778 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5779 [(set_attr "type" "sselog1")
5780 (set_attr "prefix_data16" "1")
5781 (set_attr "prefix_extra" "1")
5782 (set_attr "mode" "TI")])
5783
5784 (define_insn "ssse3_psign<mode>3"
5785 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5786 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5787 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5788 UNSPEC_PSIGN))]
5789 "TARGET_SSSE3"
5790 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5791 [(set_attr "type" "sselog1")
5792 (set_attr "prefix_extra" "1")
5793 (set_attr "mode" "DI")])
5794
5795 (define_insn "ssse3_palignrti"
5796 [(set (match_operand:TI 0 "register_operand" "=x")
5797 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5798 (match_operand:TI 2 "nonimmediate_operand" "xm")
5799 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5800 UNSPEC_PALIGNR))]
5801 "TARGET_SSSE3"
5802 {
5803 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5804 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5805 }
5806 [(set_attr "type" "sseishft")
5807 (set_attr "prefix_data16" "1")
5808 (set_attr "prefix_extra" "1")
5809 (set_attr "mode" "TI")])
5810
5811 (define_insn "ssse3_palignrdi"
5812 [(set (match_operand:DI 0 "register_operand" "=y")
5813 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5814 (match_operand:DI 2 "nonimmediate_operand" "ym")
5815 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5816 UNSPEC_PALIGNR))]
5817 "TARGET_SSSE3"
5818 {
5819 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5820 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5821 }
5822 [(set_attr "type" "sseishft")
5823 (set_attr "prefix_extra" "1")
5824 (set_attr "mode" "DI")])
5825
5826 (define_insn "abs<mode>2"
5827 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5828 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5829 "TARGET_SSSE3"
5830 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5831 [(set_attr "type" "sselog1")
5832 (set_attr "prefix_data16" "1")
5833 (set_attr "prefix_extra" "1")
5834 (set_attr "mode" "TI")])
5835
5836 (define_insn "abs<mode>2"
5837 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5838 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5839 "TARGET_SSSE3"
5840 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5841 [(set_attr "type" "sselog1")
5842 (set_attr "prefix_extra" "1")
5843 (set_attr "mode" "DI")])
5844
5845 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5846 ;;
5847 ;; AMD SSE4A instructions
5848 ;;
5849 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5850
5851 (define_insn "sse4a_vmmovntv2df"
5852 [(set (match_operand:DF 0 "memory_operand" "=m")
5853 (unspec:DF [(vec_select:DF
5854 (match_operand:V2DF 1 "register_operand" "x")
5855 (parallel [(const_int 0)]))]
5856 UNSPEC_MOVNT))]
5857 "TARGET_SSE4A"
5858 "movntsd\t{%1, %0|%0, %1}"
5859 [(set_attr "type" "ssemov")
5860 (set_attr "mode" "DF")])
5861
5862 (define_insn "sse4a_movntdf"
5863 [(set (match_operand:DF 0 "memory_operand" "=m")
5864 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
5865 UNSPEC_MOVNT))]
5866 "TARGET_SSE4A"
5867 "movntsd\t{%1, %0|%0, %1}"
5868 [(set_attr "type" "ssemov")
5869 (set_attr "mode" "DF")])
5870
5871 (define_insn "sse4a_vmmovntv4sf"
5872 [(set (match_operand:SF 0 "memory_operand" "=m")
5873 (unspec:SF [(vec_select:SF
5874 (match_operand:V4SF 1 "register_operand" "x")
5875 (parallel [(const_int 0)]))]
5876 UNSPEC_MOVNT))]
5877 "TARGET_SSE4A"
5878 "movntss\t{%1, %0|%0, %1}"
5879 [(set_attr "type" "ssemov")
5880 (set_attr "mode" "SF")])
5881
5882 (define_insn "sse4a_movntsf"
5883 [(set (match_operand:SF 0 "memory_operand" "=m")
5884 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
5885 UNSPEC_MOVNT))]
5886 "TARGET_SSE4A"
5887 "movntss\t{%1, %0|%0, %1}"
5888 [(set_attr "type" "ssemov")
5889 (set_attr "mode" "SF")])
5890
5891 (define_insn "sse4a_extrqi"
5892 [(set (match_operand:V2DI 0 "register_operand" "=x")
5893 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5894 (match_operand 2 "const_int_operand" "")
5895 (match_operand 3 "const_int_operand" "")]
5896 UNSPEC_EXTRQI))]
5897 "TARGET_SSE4A"
5898 "extrq\t{%3, %2, %0|%0, %2, %3}"
5899 [(set_attr "type" "sse")
5900 (set_attr "prefix_data16" "1")
5901 (set_attr "mode" "TI")])
5902
5903 (define_insn "sse4a_extrq"
5904 [(set (match_operand:V2DI 0 "register_operand" "=x")
5905 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5906 (match_operand:V16QI 2 "register_operand" "x")]
5907 UNSPEC_EXTRQ))]
5908 "TARGET_SSE4A"
5909 "extrq\t{%2, %0|%0, %2}"
5910 [(set_attr "type" "sse")
5911 (set_attr "prefix_data16" "1")
5912 (set_attr "mode" "TI")])
5913
5914 (define_insn "sse4a_insertqi"
5915 [(set (match_operand:V2DI 0 "register_operand" "=x")
5916 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5917 (match_operand:V2DI 2 "register_operand" "x")
5918 (match_operand 3 "const_int_operand" "")
5919 (match_operand 4 "const_int_operand" "")]
5920 UNSPEC_INSERTQI))]
5921 "TARGET_SSE4A"
5922 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
5923 [(set_attr "type" "sseins")
5924 (set_attr "prefix_rep" "1")
5925 (set_attr "mode" "TI")])
5926
5927 (define_insn "sse4a_insertq"
5928 [(set (match_operand:V2DI 0 "register_operand" "=x")
5929 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5930 (match_operand:V2DI 2 "register_operand" "x")]
5931 UNSPEC_INSERTQ))]
5932 "TARGET_SSE4A"
5933 "insertq\t{%2, %0|%0, %2}"
5934 [(set_attr "type" "sseins")
5935 (set_attr "prefix_rep" "1")
5936 (set_attr "mode" "TI")])
5937
5938 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5939 ;;
5940 ;; Intel SSE4.1 instructions
5941 ;;
5942 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5943
5944 (define_insn "sse4_1_blendpd"
5945 [(set (match_operand:V2DF 0 "register_operand" "=x")
5946 (vec_merge:V2DF
5947 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
5948 (match_operand:V2DF 1 "register_operand" "0")
5949 (match_operand:SI 3 "const_0_to_3_operand" "n")))]
5950 "TARGET_SSE4_1"
5951 "blendpd\t{%3, %2, %0|%0, %2, %3}"
5952 [(set_attr "type" "ssemov")
5953 (set_attr "prefix_extra" "1")
5954 (set_attr "mode" "V2DF")])
5955
5956 (define_insn "sse4_1_blendps"
5957 [(set (match_operand:V4SF 0 "register_operand" "=x")
5958 (vec_merge:V4SF
5959 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
5960 (match_operand:V4SF 1 "register_operand" "0")
5961 (match_operand:SI 3 "const_0_to_15_operand" "n")))]
5962 "TARGET_SSE4_1"
5963 "blendps\t{%3, %2, %0|%0, %2, %3}"
5964 [(set_attr "type" "ssemov")
5965 (set_attr "prefix_extra" "1")
5966 (set_attr "mode" "V4SF")])
5967
5968 (define_insn "sse4_1_blendvpd"
5969 [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
5970 (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
5971 (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
5972 (match_operand:V2DF 3 "register_operand" "Y0")]
5973 UNSPEC_BLENDV))]
5974 "TARGET_SSE4_1"
5975 "blendvpd\t{%3, %2, %0|%0, %2, %3}"
5976 [(set_attr "type" "ssemov")
5977 (set_attr "prefix_extra" "1")
5978 (set_attr "mode" "V2DF")])
5979
5980 (define_insn "sse4_1_blendvps"
5981 [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
5982 (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
5983 (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
5984 (match_operand:V4SF 3 "register_operand" "Y0")]
5985 UNSPEC_BLENDV))]
5986 "TARGET_SSE4_1"
5987 "blendvps\t{%3, %2, %0|%0, %2, %3}"
5988 [(set_attr "type" "ssemov")
5989 (set_attr "prefix_extra" "1")
5990 (set_attr "mode" "V4SF")])
5991
5992 (define_insn "sse4_1_dppd"
5993 [(set (match_operand:V2DF 0 "register_operand" "=x")
5994 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
5995 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
5996 (match_operand:SI 3 "const_0_to_255_operand" "n")]
5997 UNSPEC_DP))]
5998 "TARGET_SSE4_1"
5999 "dppd\t{%3, %2, %0|%0, %2, %3}"
6000 [(set_attr "type" "ssemul")
6001 (set_attr "prefix_extra" "1")
6002 (set_attr "mode" "V2DF")])
6003
6004 (define_insn "sse4_1_dpps"
6005 [(set (match_operand:V4SF 0 "register_operand" "=x")
6006 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
6007 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6008 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6009 UNSPEC_DP))]
6010 "TARGET_SSE4_1"
6011 "dpps\t{%3, %2, %0|%0, %2, %3}"
6012 [(set_attr "type" "ssemul")
6013 (set_attr "prefix_extra" "1")
6014 (set_attr "mode" "V4SF")])
6015
6016 (define_insn "sse4_1_movntdqa"
6017 [(set (match_operand:V2DI 0 "register_operand" "=x")
6018 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6019 UNSPEC_MOVNTDQA))]
6020 "TARGET_SSE4_1"
6021 "movntdqa\t{%1, %0|%0, %1}"
6022 [(set_attr "type" "ssecvt")
6023 (set_attr "prefix_extra" "1")
6024 (set_attr "mode" "TI")])
6025
6026 (define_insn "sse4_1_mpsadbw"
6027 [(set (match_operand:V16QI 0 "register_operand" "=x")
6028 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6029 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6030 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6031 UNSPEC_MPSADBW))]
6032 "TARGET_SSE4_1"
6033 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6034 [(set_attr "type" "sselog1")
6035 (set_attr "prefix_extra" "1")
6036 (set_attr "mode" "TI")])
6037
6038 (define_insn "sse4_1_packusdw"
6039 [(set (match_operand:V8HI 0 "register_operand" "=x")
6040 (vec_concat:V8HI
6041 (us_truncate:V4HI
6042 (match_operand:V4SI 1 "register_operand" "0"))
6043 (us_truncate:V4HI
6044 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6045 "TARGET_SSE4_1"
6046 "packusdw\t{%2, %0|%0, %2}"
6047 [(set_attr "type" "sselog")
6048 (set_attr "prefix_extra" "1")
6049 (set_attr "mode" "TI")])
6050
6051 (define_insn "sse4_1_pblendvb"
6052 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6053 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6054 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6055 (match_operand:V16QI 3 "register_operand" "Y0")]
6056 UNSPEC_BLENDV))]
6057 "TARGET_SSE4_1"
6058 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6059 [(set_attr "type" "ssemov")
6060 (set_attr "prefix_extra" "1")
6061 (set_attr "mode" "TI")])
6062
6063 (define_insn "sse4_1_pblendw"
6064 [(set (match_operand:V8HI 0 "register_operand" "=x")
6065 (vec_merge:V8HI
6066 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6067 (match_operand:V8HI 1 "register_operand" "0")
6068 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6069 "TARGET_SSE4_1"
6070 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6071 [(set_attr "type" "ssemov")
6072 (set_attr "prefix_extra" "1")
6073 (set_attr "mode" "TI")])
6074
6075 (define_insn "sse4_1_phminposuw"
6076 [(set (match_operand:V8HI 0 "register_operand" "=x")
6077 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6078 UNSPEC_PHMINPOSUW))]
6079 "TARGET_SSE4_1"
6080 "phminposuw\t{%1, %0|%0, %1}"
6081 [(set_attr "type" "sselog1")
6082 (set_attr "prefix_extra" "1")
6083 (set_attr "mode" "TI")])
6084
6085 (define_insn "sse4_1_extendv8qiv8hi2"
6086 [(set (match_operand:V8HI 0 "register_operand" "=x")
6087 (sign_extend:V8HI
6088 (vec_select:V8QI
6089 (match_operand:V16QI 1 "register_operand" "x")
6090 (parallel [(const_int 0)
6091 (const_int 1)
6092 (const_int 2)
6093 (const_int 3)
6094 (const_int 4)
6095 (const_int 5)
6096 (const_int 6)
6097 (const_int 7)]))))]
6098 "TARGET_SSE4_1"
6099 "pmovsxbw\t{%1, %0|%0, %1}"
6100 [(set_attr "type" "ssemov")
6101 (set_attr "prefix_extra" "1")
6102 (set_attr "mode" "TI")])
6103
6104 (define_insn "*sse4_1_extendv8qiv8hi2"
6105 [(set (match_operand:V8HI 0 "register_operand" "=x")
6106 (sign_extend:V8HI
6107 (vec_select:V8QI
6108 (vec_duplicate:V16QI
6109 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6110 (parallel [(const_int 0)
6111 (const_int 1)
6112 (const_int 2)
6113 (const_int 3)
6114 (const_int 4)
6115 (const_int 5)
6116 (const_int 6)
6117 (const_int 7)]))))]
6118 "TARGET_SSE4_1"
6119 "pmovsxbw\t{%1, %0|%0, %1}"
6120 [(set_attr "type" "ssemov")
6121 (set_attr "prefix_extra" "1")
6122 (set_attr "mode" "TI")])
6123
6124 (define_insn "sse4_1_extendv4qiv4si2"
6125 [(set (match_operand:V4SI 0 "register_operand" "=x")
6126 (sign_extend:V4SI
6127 (vec_select:V4QI
6128 (match_operand:V16QI 1 "register_operand" "x")
6129 (parallel [(const_int 0)
6130 (const_int 1)
6131 (const_int 2)
6132 (const_int 3)]))))]
6133 "TARGET_SSE4_1"
6134 "pmovsxbd\t{%1, %0|%0, %1}"
6135 [(set_attr "type" "ssemov")
6136 (set_attr "prefix_extra" "1")
6137 (set_attr "mode" "TI")])
6138
6139 (define_insn "*sse4_1_extendv4qiv4si2"
6140 [(set (match_operand:V4SI 0 "register_operand" "=x")
6141 (sign_extend:V4SI
6142 (vec_select:V4QI
6143 (vec_duplicate:V16QI
6144 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6145 (parallel [(const_int 0)
6146 (const_int 1)
6147 (const_int 2)
6148 (const_int 3)]))))]
6149 "TARGET_SSE4_1"
6150 "pmovsxbd\t{%1, %0|%0, %1}"
6151 [(set_attr "type" "ssemov")
6152 (set_attr "prefix_extra" "1")
6153 (set_attr "mode" "TI")])
6154
6155 (define_insn "sse4_1_extendv2qiv2di2"
6156 [(set (match_operand:V2DI 0 "register_operand" "=x")
6157 (sign_extend:V2DI
6158 (vec_select:V2QI
6159 (match_operand:V16QI 1 "register_operand" "x")
6160 (parallel [(const_int 0)
6161 (const_int 1)]))))]
6162 "TARGET_SSE4_1"
6163 "pmovsxbq\t{%1, %0|%0, %1}"
6164 [(set_attr "type" "ssemov")
6165 (set_attr "prefix_extra" "1")
6166 (set_attr "mode" "TI")])
6167
6168 (define_insn "*sse4_1_extendv2qiv2di2"
6169 [(set (match_operand:V2DI 0 "register_operand" "=x")
6170 (sign_extend:V2DI
6171 (vec_select:V2QI
6172 (vec_duplicate:V16QI
6173 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6174 (parallel [(const_int 0)
6175 (const_int 1)]))))]
6176 "TARGET_SSE4_1"
6177 "pmovsxbq\t{%1, %0|%0, %1}"
6178 [(set_attr "type" "ssemov")
6179 (set_attr "prefix_extra" "1")
6180 (set_attr "mode" "TI")])
6181
6182 (define_insn "sse4_1_extendv4hiv4si2"
6183 [(set (match_operand:V4SI 0 "register_operand" "=x")
6184 (sign_extend:V4SI
6185 (vec_select:V4HI
6186 (match_operand:V8HI 1 "register_operand" "x")
6187 (parallel [(const_int 0)
6188 (const_int 1)
6189 (const_int 2)
6190 (const_int 3)]))))]
6191 "TARGET_SSE4_1"
6192 "pmovsxwd\t{%1, %0|%0, %1}"
6193 [(set_attr "type" "ssemov")
6194 (set_attr "prefix_extra" "1")
6195 (set_attr "mode" "TI")])
6196
6197 (define_insn "*sse4_1_extendv4hiv4si2"
6198 [(set (match_operand:V4SI 0 "register_operand" "=x")
6199 (sign_extend:V4SI
6200 (vec_select:V4HI
6201 (vec_duplicate:V8HI
6202 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6203 (parallel [(const_int 0)
6204 (const_int 1)
6205 (const_int 2)
6206 (const_int 3)]))))]
6207 "TARGET_SSE4_1"
6208 "pmovsxwd\t{%1, %0|%0, %1}"
6209 [(set_attr "type" "ssemov")
6210 (set_attr "prefix_extra" "1")
6211 (set_attr "mode" "TI")])
6212
6213 (define_insn "sse4_1_extendv2hiv2di2"
6214 [(set (match_operand:V2DI 0 "register_operand" "=x")
6215 (sign_extend:V2DI
6216 (vec_select:V2HI
6217 (match_operand:V8HI 1 "register_operand" "x")
6218 (parallel [(const_int 0)
6219 (const_int 1)]))))]
6220 "TARGET_SSE4_1"
6221 "pmovsxwq\t{%1, %0|%0, %1}"
6222 [(set_attr "type" "ssemov")
6223 (set_attr "prefix_extra" "1")
6224 (set_attr "mode" "TI")])
6225
6226 (define_insn "*sse4_1_extendv2hiv2di2"
6227 [(set (match_operand:V2DI 0 "register_operand" "=x")
6228 (sign_extend:V2DI
6229 (vec_select:V2HI
6230 (vec_duplicate:V8HI
6231 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6232 (parallel [(const_int 0)
6233 (const_int 1)]))))]
6234 "TARGET_SSE4_1"
6235 "pmovsxwq\t{%1, %0|%0, %1}"
6236 [(set_attr "type" "ssemov")
6237 (set_attr "prefix_extra" "1")
6238 (set_attr "mode" "TI")])
6239
6240 (define_insn "sse4_1_extendv2siv2di2"
6241 [(set (match_operand:V2DI 0 "register_operand" "=x")
6242 (sign_extend:V2DI
6243 (vec_select:V2SI
6244 (match_operand:V4SI 1 "register_operand" "x")
6245 (parallel [(const_int 0)
6246 (const_int 1)]))))]
6247 "TARGET_SSE4_1"
6248 "pmovsxdq\t{%1, %0|%0, %1}"
6249 [(set_attr "type" "ssemov")
6250 (set_attr "prefix_extra" "1")
6251 (set_attr "mode" "TI")])
6252
6253 (define_insn "*sse4_1_extendv2siv2di2"
6254 [(set (match_operand:V2DI 0 "register_operand" "=x")
6255 (sign_extend:V2DI
6256 (vec_select:V2SI
6257 (vec_duplicate:V4SI
6258 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6259 (parallel [(const_int 0)
6260 (const_int 1)]))))]
6261 "TARGET_SSE4_1"
6262 "pmovsxdq\t{%1, %0|%0, %1}"
6263 [(set_attr "type" "ssemov")
6264 (set_attr "prefix_extra" "1")
6265 (set_attr "mode" "TI")])
6266
6267 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6268 [(set (match_operand:V8HI 0 "register_operand" "=x")
6269 (zero_extend:V8HI
6270 (vec_select:V8QI
6271 (match_operand:V16QI 1 "register_operand" "x")
6272 (parallel [(const_int 0)
6273 (const_int 1)
6274 (const_int 2)
6275 (const_int 3)
6276 (const_int 4)
6277 (const_int 5)
6278 (const_int 6)
6279 (const_int 7)]))))]
6280 "TARGET_SSE4_1"
6281 "pmovzxbw\t{%1, %0|%0, %1}"
6282 [(set_attr "type" "ssemov")
6283 (set_attr "prefix_extra" "1")
6284 (set_attr "mode" "TI")])
6285
6286 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6287 [(set (match_operand:V8HI 0 "register_operand" "=x")
6288 (zero_extend:V8HI
6289 (vec_select:V8QI
6290 (vec_duplicate:V16QI
6291 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6292 (parallel [(const_int 0)
6293 (const_int 1)
6294 (const_int 2)
6295 (const_int 3)
6296 (const_int 4)
6297 (const_int 5)
6298 (const_int 6)
6299 (const_int 7)]))))]
6300 "TARGET_SSE4_1"
6301 "pmovzxbw\t{%1, %0|%0, %1}"
6302 [(set_attr "type" "ssemov")
6303 (set_attr "prefix_extra" "1")
6304 (set_attr "mode" "TI")])
6305
6306 (define_insn "sse4_1_zero_extendv4qiv4si2"
6307 [(set (match_operand:V4SI 0 "register_operand" "=x")
6308 (zero_extend:V4SI
6309 (vec_select:V4QI
6310 (match_operand:V16QI 1 "register_operand" "x")
6311 (parallel [(const_int 0)
6312 (const_int 1)
6313 (const_int 2)
6314 (const_int 3)]))))]
6315 "TARGET_SSE4_1"
6316 "pmovzxbd\t{%1, %0|%0, %1}"
6317 [(set_attr "type" "ssemov")
6318 (set_attr "prefix_extra" "1")
6319 (set_attr "mode" "TI")])
6320
6321 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6322 [(set (match_operand:V4SI 0 "register_operand" "=x")
6323 (zero_extend:V4SI
6324 (vec_select:V4QI
6325 (vec_duplicate:V16QI
6326 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6327 (parallel [(const_int 0)
6328 (const_int 1)
6329 (const_int 2)
6330 (const_int 3)]))))]
6331 "TARGET_SSE4_1"
6332 "pmovzxbd\t{%1, %0|%0, %1}"
6333 [(set_attr "type" "ssemov")
6334 (set_attr "prefix_extra" "1")
6335 (set_attr "mode" "TI")])
6336
6337 (define_insn "sse4_1_zero_extendv2qiv2di2"
6338 [(set (match_operand:V2DI 0 "register_operand" "=x")
6339 (zero_extend:V2DI
6340 (vec_select:V2QI
6341 (match_operand:V16QI 1 "register_operand" "x")
6342 (parallel [(const_int 0)
6343 (const_int 1)]))))]
6344 "TARGET_SSE4_1"
6345 "pmovzxbq\t{%1, %0|%0, %1}"
6346 [(set_attr "type" "ssemov")
6347 (set_attr "prefix_extra" "1")
6348 (set_attr "mode" "TI")])
6349
6350 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6351 [(set (match_operand:V2DI 0 "register_operand" "=x")
6352 (zero_extend:V2DI
6353 (vec_select:V2QI
6354 (vec_duplicate:V16QI
6355 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6356 (parallel [(const_int 0)
6357 (const_int 1)]))))]
6358 "TARGET_SSE4_1"
6359 "pmovzxbq\t{%1, %0|%0, %1}"
6360 [(set_attr "type" "ssemov")
6361 (set_attr "prefix_extra" "1")
6362 (set_attr "mode" "TI")])
6363
6364 (define_insn "sse4_1_zero_extendv4hiv4si2"
6365 [(set (match_operand:V4SI 0 "register_operand" "=x")
6366 (zero_extend:V4SI
6367 (vec_select:V4HI
6368 (match_operand:V8HI 1 "register_operand" "x")
6369 (parallel [(const_int 0)
6370 (const_int 1)
6371 (const_int 2)
6372 (const_int 3)]))))]
6373 "TARGET_SSE4_1"
6374 "pmovzxwd\t{%1, %0|%0, %1}"
6375 [(set_attr "type" "ssemov")
6376 (set_attr "prefix_extra" "1")
6377 (set_attr "mode" "TI")])
6378
6379 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6380 [(set (match_operand:V4SI 0 "register_operand" "=x")
6381 (zero_extend:V4SI
6382 (vec_select:V4HI
6383 (vec_duplicate:V8HI
6384 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6385 (parallel [(const_int 0)
6386 (const_int 1)
6387 (const_int 2)
6388 (const_int 3)]))))]
6389 "TARGET_SSE4_1"
6390 "pmovzxwd\t{%1, %0|%0, %1}"
6391 [(set_attr "type" "ssemov")
6392 (set_attr "prefix_extra" "1")
6393 (set_attr "mode" "TI")])
6394
6395 (define_insn "sse4_1_zero_extendv2hiv2di2"
6396 [(set (match_operand:V2DI 0 "register_operand" "=x")
6397 (zero_extend:V2DI
6398 (vec_select:V2HI
6399 (match_operand:V8HI 1 "register_operand" "x")
6400 (parallel [(const_int 0)
6401 (const_int 1)]))))]
6402 "TARGET_SSE4_1"
6403 "pmovzxwq\t{%1, %0|%0, %1}"
6404 [(set_attr "type" "ssemov")
6405 (set_attr "prefix_extra" "1")
6406 (set_attr "mode" "TI")])
6407
6408 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6409 [(set (match_operand:V2DI 0 "register_operand" "=x")
6410 (zero_extend:V2DI
6411 (vec_select:V2HI
6412 (vec_duplicate:V8HI
6413 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6414 (parallel [(const_int 0)
6415 (const_int 1)]))))]
6416 "TARGET_SSE4_1"
6417 "pmovzxwq\t{%1, %0|%0, %1}"
6418 [(set_attr "type" "ssemov")
6419 (set_attr "prefix_extra" "1")
6420 (set_attr "mode" "TI")])
6421
6422 (define_insn "sse4_1_zero_extendv2siv2di2"
6423 [(set (match_operand:V2DI 0 "register_operand" "=x")
6424 (zero_extend:V2DI
6425 (vec_select:V2SI
6426 (match_operand:V4SI 1 "register_operand" "x")
6427 (parallel [(const_int 0)
6428 (const_int 1)]))))]
6429 "TARGET_SSE4_1"
6430 "pmovzxdq\t{%1, %0|%0, %1}"
6431 [(set_attr "type" "ssemov")
6432 (set_attr "prefix_extra" "1")
6433 (set_attr "mode" "TI")])
6434
6435 (define_insn "*sse4_1_zero_extendv2siv2di2"
6436 [(set (match_operand:V2DI 0 "register_operand" "=x")
6437 (zero_extend:V2DI
6438 (vec_select:V2SI
6439 (vec_duplicate:V4SI
6440 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6441 (parallel [(const_int 0)
6442 (const_int 1)]))))]
6443 "TARGET_SSE4_1"
6444 "pmovzxdq\t{%1, %0|%0, %1}"
6445 [(set_attr "type" "ssemov")
6446 (set_attr "prefix_extra" "1")
6447 (set_attr "mode" "TI")])
6448
6449 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6450 ;; But it is not a really compare instruction.
6451 (define_insn "sse4_1_ptest"
6452 [(set (reg:CC FLAGS_REG)
6453 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6454 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6455 UNSPEC_PTEST))]
6456 "TARGET_SSE4_1"
6457 "ptest\t{%1, %0|%0, %1}"
6458 [(set_attr "type" "ssecomi")
6459 (set_attr "prefix_extra" "1")
6460 (set_attr "mode" "TI")])
6461
6462 (define_insn "sse4_1_roundpd"
6463 [(set (match_operand:V2DF 0 "register_operand" "=x")
6464 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
6465 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6466 UNSPEC_ROUND))]
6467 "TARGET_SSE4_1"
6468 "roundpd\t{%2, %1, %0|%0, %1, %2}"
6469 [(set_attr "type" "ssecvt")
6470 (set_attr "prefix_extra" "1")
6471 (set_attr "mode" "V2DF")])
6472
6473 (define_insn "sse4_1_roundps"
6474 [(set (match_operand:V4SF 0 "register_operand" "=x")
6475 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
6476 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6477 UNSPEC_ROUND))]
6478 "TARGET_SSE4_1"
6479 "roundps\t{%2, %1, %0|%0, %1, %2}"
6480 [(set_attr "type" "ssecvt")
6481 (set_attr "prefix_extra" "1")
6482 (set_attr "mode" "V4SF")])
6483
6484 (define_insn "sse4_1_roundsd"
6485 [(set (match_operand:V2DF 0 "register_operand" "=x")
6486 (vec_merge:V2DF
6487 (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
6488 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6489 UNSPEC_ROUND)
6490 (match_operand:V2DF 1 "register_operand" "0")
6491 (const_int 1)))]
6492 "TARGET_SSE4_1"
6493 "roundsd\t{%3, %2, %0|%0, %2, %3}"
6494 [(set_attr "type" "ssecvt")
6495 (set_attr "prefix_extra" "1")
6496 (set_attr "mode" "V2DF")])
6497
6498 (define_insn "sse4_1_roundss"
6499 [(set (match_operand:V4SF 0 "register_operand" "=x")
6500 (vec_merge:V4SF
6501 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
6502 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6503 UNSPEC_ROUND)
6504 (match_operand:V4SF 1 "register_operand" "0")
6505 (const_int 1)))]
6506 "TARGET_SSE4_1"
6507 "roundss\t{%3, %2, %0|%0, %2, %3}"
6508 [(set_attr "type" "ssecvt")
6509 (set_attr "prefix_extra" "1")
6510 (set_attr "mode" "V4SF")])
6511
6512 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6513 ;;
6514 ;; Intel SSE4.2 string/text processing instructions
6515 ;;
6516 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6517
6518 (define_insn_and_split "sse4_2_pcmpestr"
6519 [(set (match_operand:SI 0 "register_operand" "=c,c")
6520 (unspec:SI
6521 [(match_operand:V16QI 2 "register_operand" "x,x")
6522 (match_operand:SI 3 "register_operand" "a,a")
6523 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
6524 (match_operand:SI 5 "register_operand" "d,d")
6525 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6526 UNSPEC_PCMPESTR))
6527 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6528 (unspec:V16QI
6529 [(match_dup 2)
6530 (match_dup 3)
6531 (match_dup 4)
6532 (match_dup 5)
6533 (match_dup 6)]
6534 UNSPEC_PCMPESTR))
6535 (set (reg:CC FLAGS_REG)
6536 (unspec:CC
6537 [(match_dup 2)
6538 (match_dup 3)
6539 (match_dup 4)
6540 (match_dup 5)
6541 (match_dup 6)]
6542 UNSPEC_PCMPESTR))]
6543 "TARGET_SSE4_2
6544 && !(reload_completed || reload_in_progress)"
6545 "#"
6546 "&& 1"
6547 [(const_int 0)]
6548 {
6549 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6550 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6551 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6552
6553 if (ecx)
6554 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6555 operands[3], operands[4],
6556 operands[5], operands[6]));
6557 if (xmm0)
6558 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6559 operands[3], operands[4],
6560 operands[5], operands[6]));
6561 if (flags && !(ecx || xmm0))
6562 emit_insn (gen_sse4_2_pcmpestr_cconly (operands[2], operands[3],
6563 operands[4], operands[5],
6564 operands[6]));
6565 DONE;
6566 }
6567 [(set_attr "type" "sselog")
6568 (set_attr "prefix_data16" "1")
6569 (set_attr "prefix_extra" "1")
6570 (set_attr "memory" "none,load")
6571 (set_attr "mode" "TI")])
6572
6573 (define_insn "sse4_2_pcmpestri"
6574 [(set (match_operand:SI 0 "register_operand" "=c,c")
6575 (unspec:SI
6576 [(match_operand:V16QI 1 "register_operand" "x,x")
6577 (match_operand:SI 2 "register_operand" "a,a")
6578 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6579 (match_operand:SI 4 "register_operand" "d,d")
6580 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6581 UNSPEC_PCMPESTR))
6582 (set (reg:CC FLAGS_REG)
6583 (unspec:CC
6584 [(match_dup 1)
6585 (match_dup 2)
6586 (match_dup 3)
6587 (match_dup 4)
6588 (match_dup 5)]
6589 UNSPEC_PCMPESTR))]
6590 "TARGET_SSE4_2"
6591 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6592 [(set_attr "type" "sselog")
6593 (set_attr "prefix_data16" "1")
6594 (set_attr "prefix_extra" "1")
6595 (set_attr "memory" "none,load")
6596 (set_attr "mode" "TI")])
6597
6598 (define_insn "sse4_2_pcmpestrm"
6599 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6600 (unspec:V16QI
6601 [(match_operand:V16QI 1 "register_operand" "x,x")
6602 (match_operand:SI 2 "register_operand" "a,a")
6603 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6604 (match_operand:SI 4 "register_operand" "d,d")
6605 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6606 UNSPEC_PCMPESTR))
6607 (set (reg:CC FLAGS_REG)
6608 (unspec:CC
6609 [(match_dup 1)
6610 (match_dup 2)
6611 (match_dup 3)
6612 (match_dup 4)
6613 (match_dup 5)]
6614 UNSPEC_PCMPESTR))]
6615 "TARGET_SSE4_2"
6616 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6617 [(set_attr "type" "sselog")
6618 (set_attr "prefix_data16" "1")
6619 (set_attr "prefix_extra" "1")
6620 (set_attr "memory" "none,load")
6621 (set_attr "mode" "TI")])
6622
6623 (define_insn "sse4_2_pcmpestr_cconly"
6624 [(set (reg:CC FLAGS_REG)
6625 (unspec:CC
6626 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6627 (match_operand:SI 1 "register_operand" "a,a,a,a")
6628 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x,m")
6629 (match_operand:SI 3 "register_operand" "d,d,d,d")
6630 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6631 UNSPEC_PCMPESTR))
6632 (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
6633 (clobber (match_scratch:SI 6 "= X, X,c,c"))]
6634 "TARGET_SSE4_2"
6635 "@
6636 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6637 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6638 pcmpestri\t{%4, %2, %0|%0, %2, %4}
6639 pcmpestri\t{%4, %2, %0|%0, %2, %4}"
6640 [(set_attr "type" "sselog")
6641 (set_attr "prefix_data16" "1")
6642 (set_attr "prefix_extra" "1")
6643 (set_attr "memory" "none,load,none,load")
6644 (set_attr "mode" "TI")])
6645
6646 (define_insn_and_split "sse4_2_pcmpistr"
6647 [(set (match_operand:SI 0 "register_operand" "=c,c")
6648 (unspec:SI
6649 [(match_operand:V16QI 2 "register_operand" "x,x")
6650 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6651 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6652 UNSPEC_PCMPISTR))
6653 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6654 (unspec:V16QI
6655 [(match_dup 2)
6656 (match_dup 3)
6657 (match_dup 4)]
6658 UNSPEC_PCMPISTR))
6659 (set (reg:CC FLAGS_REG)
6660 (unspec:CC
6661 [(match_dup 2)
6662 (match_dup 3)
6663 (match_dup 4)]
6664 UNSPEC_PCMPISTR))]
6665 "TARGET_SSE4_2
6666 && !(reload_completed || reload_in_progress)"
6667 "#"
6668 "&& 1"
6669 [(const_int 0)]
6670 {
6671 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6672 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6673 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6674
6675 if (ecx)
6676 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6677 operands[3], operands[4]));
6678 if (xmm0)
6679 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6680 operands[3], operands[4]));
6681 if (flags && !(ecx || xmm0))
6682 emit_insn (gen_sse4_2_pcmpistr_cconly (operands[2], operands[3],
6683 operands[4]));
6684 DONE;
6685 }
6686 [(set_attr "type" "sselog")
6687 (set_attr "prefix_data16" "1")
6688 (set_attr "prefix_extra" "1")
6689 (set_attr "memory" "none,load")
6690 (set_attr "mode" "TI")])
6691
6692 (define_insn "sse4_2_pcmpistri"
6693 [(set (match_operand:SI 0 "register_operand" "=c,c")
6694 (unspec:SI
6695 [(match_operand:V16QI 1 "register_operand" "x,x")
6696 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6697 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6698 UNSPEC_PCMPISTR))
6699 (set (reg:CC FLAGS_REG)
6700 (unspec:CC
6701 [(match_dup 1)
6702 (match_dup 2)
6703 (match_dup 3)]
6704 UNSPEC_PCMPISTR))]
6705 "TARGET_SSE4_2"
6706 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6707 [(set_attr "type" "sselog")
6708 (set_attr "prefix_data16" "1")
6709 (set_attr "prefix_extra" "1")
6710 (set_attr "memory" "none,load")
6711 (set_attr "mode" "TI")])
6712
6713 (define_insn "sse4_2_pcmpistrm"
6714 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6715 (unspec:V16QI
6716 [(match_operand:V16QI 1 "register_operand" "x,x")
6717 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6718 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6719 UNSPEC_PCMPISTR))
6720 (set (reg:CC FLAGS_REG)
6721 (unspec:CC
6722 [(match_dup 1)
6723 (match_dup 2)
6724 (match_dup 3)]
6725 UNSPEC_PCMPISTR))]
6726 "TARGET_SSE4_2"
6727 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6728 [(set_attr "type" "sselog")
6729 (set_attr "prefix_data16" "1")
6730 (set_attr "prefix_extra" "1")
6731 (set_attr "memory" "none,load")
6732 (set_attr "mode" "TI")])
6733
6734 (define_insn "sse4_2_pcmpistr_cconly"
6735 [(set (reg:CC FLAGS_REG)
6736 (unspec:CC
6737 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6738 (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
6739 (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
6740 UNSPEC_PCMPISTR))
6741 (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
6742 (clobber (match_scratch:SI 4 "= X, X,c,c"))]
6743 "TARGET_SSE4_2"
6744 "@
6745 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6746 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6747 pcmpistri\t{%2, %1, %0|%0, %1, %2}
6748 pcmpistri\t{%2, %1, %0|%0, %1, %2}"
6749 [(set_attr "type" "sselog")
6750 (set_attr "prefix_data16" "1")
6751 (set_attr "prefix_extra" "1")
6752 (set_attr "memory" "none,load,none,load")
6753 (set_attr "mode" "TI")])