md.texi (Iterators): Renamed from Macros.
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007
3 ;; Free Software Foundation, Inc.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
25
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
28
29 ;; Mix-n-match
30 (define_mode_iterator SSEMODE12 [V16QI V8HI])
31 (define_mode_iterator SSEMODE24 [V8HI V4SI])
32 (define_mode_iterator SSEMODE14 [V16QI V4SI])
33 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
34 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
35
36 ;; Mapping from integer vector mode to mnemonic suffix
37 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
38
39 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
40
41 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42 ;;
43 ;; Move patterns
44 ;;
45 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46
47 ;; All of these patterns are enabled for SSE1 as well as SSE2.
48 ;; This is essential for maintaining stable calling conventions.
49
50 (define_expand "mov<mode>"
51 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
52 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
53 "TARGET_SSE"
54 {
55 ix86_expand_vector_move (<MODE>mode, operands);
56 DONE;
57 })
58
59 (define_insn "*mov<mode>_internal"
60 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
61 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
62 "TARGET_SSE
63 && (register_operand (operands[0], <MODE>mode)
64 || register_operand (operands[1], <MODE>mode))"
65 {
66 switch (which_alternative)
67 {
68 case 0:
69 return standard_sse_constant_opcode (insn, operands[1]);
70 case 1:
71 case 2:
72 if (get_attr_mode (insn) == MODE_V4SF)
73 return "movaps\t{%1, %0|%0, %1}";
74 else
75 return "movdqa\t{%1, %0|%0, %1}";
76 default:
77 gcc_unreachable ();
78 }
79 }
80 [(set_attr "type" "sselog1,ssemov,ssemov")
81 (set (attr "mode")
82 (if_then_else
83 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
84 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
85 (and (eq_attr "alternative" "2")
86 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
87 (const_int 0))))
88 (const_string "V4SF")
89 (const_string "TI")))])
90
91 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
92 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
93 ;; from memory, we'd prefer to load the memory directly into the %xmm
94 ;; register. To facilitate this happy circumstance, this pattern won't
95 ;; split until after register allocation. If the 64-bit value didn't
96 ;; come from memory, this is the best we can do. This is much better
97 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
98 ;; from there.
99
100 (define_insn_and_split "movdi_to_sse"
101 [(parallel
102 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
103 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
104 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
105 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
106 "#"
107 "&& reload_completed"
108 [(const_int 0)]
109 {
110 if (register_operand (operands[1], DImode))
111 {
112 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
113 Assemble the 64-bit DImode value in an xmm register. */
114 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
115 gen_rtx_SUBREG (SImode, operands[1], 0)));
116 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
117 gen_rtx_SUBREG (SImode, operands[1], 4)));
118 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
119 }
120 else if (memory_operand (operands[1], DImode))
121 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
122 else
123 gcc_unreachable ();
124 })
125
126 (define_expand "movv4sf"
127 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
128 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
129 "TARGET_SSE"
130 {
131 ix86_expand_vector_move (V4SFmode, operands);
132 DONE;
133 })
134
135 (define_insn "*movv4sf_internal"
136 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
137 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
138 "TARGET_SSE
139 && (register_operand (operands[0], V4SFmode)
140 || register_operand (operands[1], V4SFmode))"
141 {
142 switch (which_alternative)
143 {
144 case 0:
145 return standard_sse_constant_opcode (insn, operands[1]);
146 case 1:
147 case 2:
148 return "movaps\t{%1, %0|%0, %1}";
149 default:
150 gcc_unreachable ();
151 }
152 }
153 [(set_attr "type" "sselog1,ssemov,ssemov")
154 (set_attr "mode" "V4SF")])
155
156 (define_split
157 [(set (match_operand:V4SF 0 "register_operand" "")
158 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
159 "TARGET_SSE && reload_completed"
160 [(set (match_dup 0)
161 (vec_merge:V4SF
162 (vec_duplicate:V4SF (match_dup 1))
163 (match_dup 2)
164 (const_int 1)))]
165 {
166 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
167 operands[2] = CONST0_RTX (V4SFmode);
168 })
169
170 (define_expand "movv2df"
171 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
172 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
173 "TARGET_SSE"
174 {
175 ix86_expand_vector_move (V2DFmode, operands);
176 DONE;
177 })
178
179 (define_insn "*movv2df_internal"
180 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
181 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
182 "TARGET_SSE
183 && (register_operand (operands[0], V2DFmode)
184 || register_operand (operands[1], V2DFmode))"
185 {
186 switch (which_alternative)
187 {
188 case 0:
189 return standard_sse_constant_opcode (insn, operands[1]);
190 case 1:
191 case 2:
192 if (get_attr_mode (insn) == MODE_V4SF)
193 return "movaps\t{%1, %0|%0, %1}";
194 else
195 return "movapd\t{%1, %0|%0, %1}";
196 default:
197 gcc_unreachable ();
198 }
199 }
200 [(set_attr "type" "sselog1,ssemov,ssemov")
201 (set (attr "mode")
202 (if_then_else
203 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
204 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
205 (and (eq_attr "alternative" "2")
206 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
207 (const_int 0))))
208 (const_string "V4SF")
209 (const_string "V2DF")))])
210
211 (define_split
212 [(set (match_operand:V2DF 0 "register_operand" "")
213 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
214 "TARGET_SSE2 && reload_completed"
215 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
216 {
217 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
218 operands[2] = CONST0_RTX (DFmode);
219 })
220
221 (define_expand "push<mode>1"
222 [(match_operand:SSEMODE 0 "register_operand" "")]
223 "TARGET_SSE"
224 {
225 ix86_expand_push (<MODE>mode, operands[0]);
226 DONE;
227 })
228
229 (define_expand "movmisalign<mode>"
230 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
231 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
232 "TARGET_SSE"
233 {
234 ix86_expand_vector_move_misalign (<MODE>mode, operands);
235 DONE;
236 })
237
238 (define_insn "sse_movups"
239 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
240 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
241 UNSPEC_MOVU))]
242 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
243 "movups\t{%1, %0|%0, %1}"
244 [(set_attr "type" "ssemov")
245 (set_attr "mode" "V2DF")])
246
247 (define_insn "sse2_movupd"
248 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
249 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
250 UNSPEC_MOVU))]
251 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
252 "movupd\t{%1, %0|%0, %1}"
253 [(set_attr "type" "ssemov")
254 (set_attr "mode" "V2DF")])
255
256 (define_insn "sse2_movdqu"
257 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
258 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
259 UNSPEC_MOVU))]
260 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
261 "movdqu\t{%1, %0|%0, %1}"
262 [(set_attr "type" "ssemov")
263 (set_attr "prefix_data16" "1")
264 (set_attr "mode" "TI")])
265
266 (define_insn "sse_movntv4sf"
267 [(set (match_operand:V4SF 0 "memory_operand" "=m")
268 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
269 UNSPEC_MOVNT))]
270 "TARGET_SSE"
271 "movntps\t{%1, %0|%0, %1}"
272 [(set_attr "type" "ssemov")
273 (set_attr "mode" "V4SF")])
274
275 (define_insn "sse2_movntv2df"
276 [(set (match_operand:V2DF 0 "memory_operand" "=m")
277 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
278 UNSPEC_MOVNT))]
279 "TARGET_SSE2"
280 "movntpd\t{%1, %0|%0, %1}"
281 [(set_attr "type" "ssecvt")
282 (set_attr "mode" "V2DF")])
283
284 (define_insn "sse2_movntv2di"
285 [(set (match_operand:V2DI 0 "memory_operand" "=m")
286 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
287 UNSPEC_MOVNT))]
288 "TARGET_SSE2"
289 "movntdq\t{%1, %0|%0, %1}"
290 [(set_attr "type" "ssecvt")
291 (set_attr "prefix_data16" "1")
292 (set_attr "mode" "TI")])
293
294 (define_insn "sse2_movntsi"
295 [(set (match_operand:SI 0 "memory_operand" "=m")
296 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
297 UNSPEC_MOVNT))]
298 "TARGET_SSE2"
299 "movnti\t{%1, %0|%0, %1}"
300 [(set_attr "type" "ssecvt")
301 (set_attr "mode" "V2DF")])
302
303 (define_insn "sse3_lddqu"
304 [(set (match_operand:V16QI 0 "register_operand" "=x")
305 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
306 UNSPEC_LDDQU))]
307 "TARGET_SSE3"
308 "lddqu\t{%1, %0|%0, %1}"
309 [(set_attr "type" "ssecvt")
310 (set_attr "prefix_rep" "1")
311 (set_attr "mode" "TI")])
312
313 ; Expand patterns for non-temporal stores. At the moment, only those
314 ; that directly map to insns are defined; it would be possible to
315 ; define patterns for other modes that would expand to several insns.
316
317 (define_expand "storentv4sf"
318 [(set (match_operand:V4SF 0 "memory_operand" "=m")
319 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
320 UNSPEC_MOVNT))]
321 "TARGET_SSE"
322 "")
323
324 (define_expand "storentv2df"
325 [(set (match_operand:V2DF 0 "memory_operand" "=m")
326 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
327 UNSPEC_MOVNT))]
328 "TARGET_SSE2"
329 "")
330
331 (define_expand "storentv2di"
332 [(set (match_operand:V2DI 0 "memory_operand" "=m")
333 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
334 UNSPEC_MOVNT))]
335 "TARGET_SSE2"
336 "")
337
338 (define_expand "storentsi"
339 [(set (match_operand:SI 0 "memory_operand" "=m")
340 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
341 UNSPEC_MOVNT))]
342 "TARGET_SSE2"
343 "")
344
345 (define_expand "storentdf"
346 [(set (match_operand:DF 0 "memory_operand" "")
347 (unspec:DF [(match_operand:DF 1 "register_operand" "")]
348 UNSPEC_MOVNT))]
349 "TARGET_SSE4A"
350 "")
351
352 (define_expand "storentsf"
353 [(set (match_operand:SF 0 "memory_operand" "")
354 (unspec:SF [(match_operand:SF 1 "register_operand" "")]
355 UNSPEC_MOVNT))]
356 "TARGET_SSE4A"
357 "")
358
359 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
360 ;;
361 ;; Parallel single-precision floating point arithmetic
362 ;;
363 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
364
365 (define_expand "negv4sf2"
366 [(set (match_operand:V4SF 0 "register_operand" "")
367 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
368 "TARGET_SSE"
369 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
370
371 (define_expand "absv4sf2"
372 [(set (match_operand:V4SF 0 "register_operand" "")
373 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
374 "TARGET_SSE"
375 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
376
377 (define_expand "addv4sf3"
378 [(set (match_operand:V4SF 0 "register_operand" "")
379 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
380 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
381 "TARGET_SSE"
382 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
383
384 (define_insn "*addv4sf3"
385 [(set (match_operand:V4SF 0 "register_operand" "=x")
386 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
387 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
388 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
389 "addps\t{%2, %0|%0, %2}"
390 [(set_attr "type" "sseadd")
391 (set_attr "mode" "V4SF")])
392
393 (define_insn "sse_vmaddv4sf3"
394 [(set (match_operand:V4SF 0 "register_operand" "=x")
395 (vec_merge:V4SF
396 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
397 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
398 (match_dup 1)
399 (const_int 1)))]
400 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
401 "addss\t{%2, %0|%0, %2}"
402 [(set_attr "type" "sseadd")
403 (set_attr "mode" "SF")])
404
405 (define_expand "subv4sf3"
406 [(set (match_operand:V4SF 0 "register_operand" "")
407 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
408 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
409 "TARGET_SSE"
410 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
411
412 (define_insn "*subv4sf3"
413 [(set (match_operand:V4SF 0 "register_operand" "=x")
414 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
415 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
416 "TARGET_SSE"
417 "subps\t{%2, %0|%0, %2}"
418 [(set_attr "type" "sseadd")
419 (set_attr "mode" "V4SF")])
420
421 (define_insn "sse_vmsubv4sf3"
422 [(set (match_operand:V4SF 0 "register_operand" "=x")
423 (vec_merge:V4SF
424 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
425 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
426 (match_dup 1)
427 (const_int 1)))]
428 "TARGET_SSE"
429 "subss\t{%2, %0|%0, %2}"
430 [(set_attr "type" "sseadd")
431 (set_attr "mode" "SF")])
432
433 (define_expand "mulv4sf3"
434 [(set (match_operand:V4SF 0 "register_operand" "")
435 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
436 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
437 "TARGET_SSE"
438 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
439
440 (define_insn "*mulv4sf3"
441 [(set (match_operand:V4SF 0 "register_operand" "=x")
442 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
443 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
444 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
445 "mulps\t{%2, %0|%0, %2}"
446 [(set_attr "type" "ssemul")
447 (set_attr "mode" "V4SF")])
448
449 (define_insn "sse_vmmulv4sf3"
450 [(set (match_operand:V4SF 0 "register_operand" "=x")
451 (vec_merge:V4SF
452 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
453 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
454 (match_dup 1)
455 (const_int 1)))]
456 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
457 "mulss\t{%2, %0|%0, %2}"
458 [(set_attr "type" "ssemul")
459 (set_attr "mode" "SF")])
460
461 (define_expand "divv4sf3"
462 [(set (match_operand:V4SF 0 "register_operand" "")
463 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
464 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
465 "TARGET_SSE"
466 {
467 ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);
468
469 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
470 && flag_finite_math_only && !flag_trapping_math
471 && flag_unsafe_math_optimizations)
472 {
473 ix86_emit_swdivsf (operands[0], operands[1],
474 operands[2], V4SFmode);
475 DONE;
476 }
477 })
478
479 (define_insn "*divv4sf3"
480 [(set (match_operand:V4SF 0 "register_operand" "=x")
481 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
482 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
483 "TARGET_SSE"
484 "divps\t{%2, %0|%0, %2}"
485 [(set_attr "type" "ssediv")
486 (set_attr "mode" "V4SF")])
487
488 (define_insn "sse_vmdivv4sf3"
489 [(set (match_operand:V4SF 0 "register_operand" "=x")
490 (vec_merge:V4SF
491 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
492 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
493 (match_dup 1)
494 (const_int 1)))]
495 "TARGET_SSE"
496 "divss\t{%2, %0|%0, %2}"
497 [(set_attr "type" "ssediv")
498 (set_attr "mode" "SF")])
499
500 (define_insn "sse_rcpv4sf2"
501 [(set (match_operand:V4SF 0 "register_operand" "=x")
502 (unspec:V4SF
503 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
504 "TARGET_SSE"
505 "rcpps\t{%1, %0|%0, %1}"
506 [(set_attr "type" "sse")
507 (set_attr "mode" "V4SF")])
508
509 (define_insn "sse_vmrcpv4sf2"
510 [(set (match_operand:V4SF 0 "register_operand" "=x")
511 (vec_merge:V4SF
512 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
513 UNSPEC_RCP)
514 (match_operand:V4SF 2 "register_operand" "0")
515 (const_int 1)))]
516 "TARGET_SSE"
517 "rcpss\t{%1, %0|%0, %1}"
518 [(set_attr "type" "sse")
519 (set_attr "mode" "SF")])
520
521 (define_insn "*sse_rsqrtv4sf2"
522 [(set (match_operand:V4SF 0 "register_operand" "=x")
523 (unspec:V4SF
524 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
525 "TARGET_SSE"
526 "rsqrtps\t{%1, %0|%0, %1}"
527 [(set_attr "type" "sse")
528 (set_attr "mode" "V4SF")])
529
530 (define_expand "sse_rsqrtv4sf2"
531 [(set (match_operand:V4SF 0 "register_operand" "")
532 (unspec:V4SF
533 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
534 "TARGET_SSE"
535 {
536 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
537 && flag_finite_math_only && !flag_trapping_math
538 && flag_unsafe_math_optimizations)
539 {
540 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
541 DONE;
542 }
543 })
544
545 (define_insn "sse_vmrsqrtv4sf2"
546 [(set (match_operand:V4SF 0 "register_operand" "=x")
547 (vec_merge:V4SF
548 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
549 UNSPEC_RSQRT)
550 (match_operand:V4SF 2 "register_operand" "0")
551 (const_int 1)))]
552 "TARGET_SSE"
553 "rsqrtss\t{%1, %0|%0, %1}"
554 [(set_attr "type" "sse")
555 (set_attr "mode" "SF")])
556
557 (define_insn "*sqrtv4sf2"
558 [(set (match_operand:V4SF 0 "register_operand" "=x")
559 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
560 "TARGET_SSE"
561 "sqrtps\t{%1, %0|%0, %1}"
562 [(set_attr "type" "sse")
563 (set_attr "mode" "V4SF")])
564
565 (define_expand "sqrtv4sf2"
566 [(set (match_operand:V4SF 0 "register_operand" "=")
567 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
568 "TARGET_SSE"
569 {
570 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
571 && flag_finite_math_only && !flag_trapping_math
572 && flag_unsafe_math_optimizations)
573 {
574 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
575 DONE;
576 }
577 })
578
579 (define_insn "sse_vmsqrtv4sf2"
580 [(set (match_operand:V4SF 0 "register_operand" "=x")
581 (vec_merge:V4SF
582 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
583 (match_operand:V4SF 2 "register_operand" "0")
584 (const_int 1)))]
585 "TARGET_SSE"
586 "sqrtss\t{%1, %0|%0, %1}"
587 [(set_attr "type" "sse")
588 (set_attr "mode" "SF")])
589
590 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
591 ;; isn't really correct, as those rtl operators aren't defined when
592 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
593
594 (define_expand "smaxv4sf3"
595 [(set (match_operand:V4SF 0 "register_operand" "")
596 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
597 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
598 "TARGET_SSE"
599 {
600 if (!flag_finite_math_only)
601 operands[1] = force_reg (V4SFmode, operands[1]);
602 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
603 })
604
605 (define_insn "*smaxv4sf3_finite"
606 [(set (match_operand:V4SF 0 "register_operand" "=x")
607 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
608 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
609 "TARGET_SSE && flag_finite_math_only
610 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
611 "maxps\t{%2, %0|%0, %2}"
612 [(set_attr "type" "sse")
613 (set_attr "mode" "V4SF")])
614
615 (define_insn "*smaxv4sf3"
616 [(set (match_operand:V4SF 0 "register_operand" "=x")
617 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
618 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
619 "TARGET_SSE"
620 "maxps\t{%2, %0|%0, %2}"
621 [(set_attr "type" "sse")
622 (set_attr "mode" "V4SF")])
623
624 (define_insn "sse_vmsmaxv4sf3"
625 [(set (match_operand:V4SF 0 "register_operand" "=x")
626 (vec_merge:V4SF
627 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
628 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
629 (match_dup 1)
630 (const_int 1)))]
631 "TARGET_SSE"
632 "maxss\t{%2, %0|%0, %2}"
633 [(set_attr "type" "sse")
634 (set_attr "mode" "SF")])
635
636 (define_expand "sminv4sf3"
637 [(set (match_operand:V4SF 0 "register_operand" "")
638 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
639 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
640 "TARGET_SSE"
641 {
642 if (!flag_finite_math_only)
643 operands[1] = force_reg (V4SFmode, operands[1]);
644 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
645 })
646
647 (define_insn "*sminv4sf3_finite"
648 [(set (match_operand:V4SF 0 "register_operand" "=x")
649 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
650 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
651 "TARGET_SSE && flag_finite_math_only
652 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
653 "minps\t{%2, %0|%0, %2}"
654 [(set_attr "type" "sse")
655 (set_attr "mode" "V4SF")])
656
657 (define_insn "*sminv4sf3"
658 [(set (match_operand:V4SF 0 "register_operand" "=x")
659 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
660 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
661 "TARGET_SSE"
662 "minps\t{%2, %0|%0, %2}"
663 [(set_attr "type" "sse")
664 (set_attr "mode" "V4SF")])
665
666 (define_insn "sse_vmsminv4sf3"
667 [(set (match_operand:V4SF 0 "register_operand" "=x")
668 (vec_merge:V4SF
669 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
670 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
671 (match_dup 1)
672 (const_int 1)))]
673 "TARGET_SSE"
674 "minss\t{%2, %0|%0, %2}"
675 [(set_attr "type" "sse")
676 (set_attr "mode" "SF")])
677
678 ;; These versions of the min/max patterns implement exactly the operations
679 ;; min = (op1 < op2 ? op1 : op2)
680 ;; max = (!(op1 < op2) ? op1 : op2)
681 ;; Their operands are not commutative, and thus they may be used in the
682 ;; presence of -0.0 and NaN.
683
684 (define_insn "*ieee_sminv4sf3"
685 [(set (match_operand:V4SF 0 "register_operand" "=x")
686 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
687 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
688 UNSPEC_IEEE_MIN))]
689 "TARGET_SSE"
690 "minps\t{%2, %0|%0, %2}"
691 [(set_attr "type" "sseadd")
692 (set_attr "mode" "V4SF")])
693
694 (define_insn "*ieee_smaxv4sf3"
695 [(set (match_operand:V4SF 0 "register_operand" "=x")
696 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
697 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
698 UNSPEC_IEEE_MAX))]
699 "TARGET_SSE"
700 "maxps\t{%2, %0|%0, %2}"
701 [(set_attr "type" "sseadd")
702 (set_attr "mode" "V4SF")])
703
704 (define_insn "*ieee_sminv2df3"
705 [(set (match_operand:V2DF 0 "register_operand" "=x")
706 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
707 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
708 UNSPEC_IEEE_MIN))]
709 "TARGET_SSE2"
710 "minpd\t{%2, %0|%0, %2}"
711 [(set_attr "type" "sseadd")
712 (set_attr "mode" "V2DF")])
713
714 (define_insn "*ieee_smaxv2df3"
715 [(set (match_operand:V2DF 0 "register_operand" "=x")
716 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
717 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
718 UNSPEC_IEEE_MAX))]
719 "TARGET_SSE2"
720 "maxpd\t{%2, %0|%0, %2}"
721 [(set_attr "type" "sseadd")
722 (set_attr "mode" "V2DF")])
723
724 (define_insn "sse3_addsubv4sf3"
725 [(set (match_operand:V4SF 0 "register_operand" "=x")
726 (vec_merge:V4SF
727 (plus:V4SF
728 (match_operand:V4SF 1 "register_operand" "0")
729 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
730 (minus:V4SF (match_dup 1) (match_dup 2))
731 (const_int 5)))]
732 "TARGET_SSE3"
733 "addsubps\t{%2, %0|%0, %2}"
734 [(set_attr "type" "sseadd")
735 (set_attr "prefix_rep" "1")
736 (set_attr "mode" "V4SF")])
737
738 (define_insn "sse3_haddv4sf3"
739 [(set (match_operand:V4SF 0 "register_operand" "=x")
740 (vec_concat:V4SF
741 (vec_concat:V2SF
742 (plus:SF
743 (vec_select:SF
744 (match_operand:V4SF 1 "register_operand" "0")
745 (parallel [(const_int 0)]))
746 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
747 (plus:SF
748 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
749 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
750 (vec_concat:V2SF
751 (plus:SF
752 (vec_select:SF
753 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
754 (parallel [(const_int 0)]))
755 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
756 (plus:SF
757 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
758 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
759 "TARGET_SSE3"
760 "haddps\t{%2, %0|%0, %2}"
761 [(set_attr "type" "sseadd")
762 (set_attr "prefix_rep" "1")
763 (set_attr "mode" "V4SF")])
764
765 (define_insn "sse3_hsubv4sf3"
766 [(set (match_operand:V4SF 0 "register_operand" "=x")
767 (vec_concat:V4SF
768 (vec_concat:V2SF
769 (minus:SF
770 (vec_select:SF
771 (match_operand:V4SF 1 "register_operand" "0")
772 (parallel [(const_int 0)]))
773 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
774 (minus:SF
775 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
776 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
777 (vec_concat:V2SF
778 (minus:SF
779 (vec_select:SF
780 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
781 (parallel [(const_int 0)]))
782 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
783 (minus:SF
784 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
785 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
786 "TARGET_SSE3"
787 "hsubps\t{%2, %0|%0, %2}"
788 [(set_attr "type" "sseadd")
789 (set_attr "prefix_rep" "1")
790 (set_attr "mode" "V4SF")])
791
792 (define_expand "reduc_splus_v4sf"
793 [(match_operand:V4SF 0 "register_operand" "")
794 (match_operand:V4SF 1 "register_operand" "")]
795 "TARGET_SSE"
796 {
797 if (TARGET_SSE3)
798 {
799 rtx tmp = gen_reg_rtx (V4SFmode);
800 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
801 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
802 }
803 else
804 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
805 DONE;
806 })
807
808 (define_expand "reduc_smax_v4sf"
809 [(match_operand:V4SF 0 "register_operand" "")
810 (match_operand:V4SF 1 "register_operand" "")]
811 "TARGET_SSE"
812 {
813 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
814 DONE;
815 })
816
817 (define_expand "reduc_smin_v4sf"
818 [(match_operand:V4SF 0 "register_operand" "")
819 (match_operand:V4SF 1 "register_operand" "")]
820 "TARGET_SSE"
821 {
822 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
823 DONE;
824 })
825
826 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
827 ;;
828 ;; Parallel single-precision floating point comparisons
829 ;;
830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
831
832 (define_insn "sse_maskcmpv4sf3"
833 [(set (match_operand:V4SF 0 "register_operand" "=x")
834 (match_operator:V4SF 3 "sse_comparison_operator"
835 [(match_operand:V4SF 1 "register_operand" "0")
836 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
837 "TARGET_SSE"
838 "cmp%D3ps\t{%2, %0|%0, %2}"
839 [(set_attr "type" "ssecmp")
840 (set_attr "mode" "V4SF")])
841
842 (define_insn "sse_maskcmpsf3"
843 [(set (match_operand:SF 0 "register_operand" "=x")
844 (match_operator:SF 3 "sse_comparison_operator"
845 [(match_operand:SF 1 "register_operand" "0")
846 (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
847 "TARGET_SSE"
848 "cmp%D3ss\t{%2, %0|%0, %2}"
849 [(set_attr "type" "ssecmp")
850 (set_attr "mode" "SF")])
851
852 (define_insn "sse_vmmaskcmpv4sf3"
853 [(set (match_operand:V4SF 0 "register_operand" "=x")
854 (vec_merge:V4SF
855 (match_operator:V4SF 3 "sse_comparison_operator"
856 [(match_operand:V4SF 1 "register_operand" "0")
857 (match_operand:V4SF 2 "register_operand" "x")])
858 (match_dup 1)
859 (const_int 1)))]
860 "TARGET_SSE"
861 "cmp%D3ss\t{%2, %0|%0, %2}"
862 [(set_attr "type" "ssecmp")
863 (set_attr "mode" "SF")])
864
865 (define_insn "sse_comi"
866 [(set (reg:CCFP FLAGS_REG)
867 (compare:CCFP
868 (vec_select:SF
869 (match_operand:V4SF 0 "register_operand" "x")
870 (parallel [(const_int 0)]))
871 (vec_select:SF
872 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
873 (parallel [(const_int 0)]))))]
874 "TARGET_SSE"
875 "comiss\t{%1, %0|%0, %1}"
876 [(set_attr "type" "ssecomi")
877 (set_attr "mode" "SF")])
878
879 (define_insn "sse_ucomi"
880 [(set (reg:CCFPU FLAGS_REG)
881 (compare:CCFPU
882 (vec_select:SF
883 (match_operand:V4SF 0 "register_operand" "x")
884 (parallel [(const_int 0)]))
885 (vec_select:SF
886 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
887 (parallel [(const_int 0)]))))]
888 "TARGET_SSE"
889 "ucomiss\t{%1, %0|%0, %1}"
890 [(set_attr "type" "ssecomi")
891 (set_attr "mode" "SF")])
892
893 (define_expand "vcondv4sf"
894 [(set (match_operand:V4SF 0 "register_operand" "")
895 (if_then_else:V4SF
896 (match_operator 3 ""
897 [(match_operand:V4SF 4 "nonimmediate_operand" "")
898 (match_operand:V4SF 5 "nonimmediate_operand" "")])
899 (match_operand:V4SF 1 "general_operand" "")
900 (match_operand:V4SF 2 "general_operand" "")))]
901 "TARGET_SSE"
902 {
903 if (ix86_expand_fp_vcond (operands))
904 DONE;
905 else
906 FAIL;
907 })
908
909 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
910 ;;
911 ;; Parallel single-precision floating point logical operations
912 ;;
913 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
914
915 (define_expand "andv4sf3"
916 [(set (match_operand:V4SF 0 "register_operand" "")
917 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
918 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
919 "TARGET_SSE"
920 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
921
922 (define_insn "*andv4sf3"
923 [(set (match_operand:V4SF 0 "register_operand" "=x")
924 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
925 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
926 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
927 "andps\t{%2, %0|%0, %2}"
928 [(set_attr "type" "sselog")
929 (set_attr "mode" "V4SF")])
930
931 (define_insn "sse_nandv4sf3"
932 [(set (match_operand:V4SF 0 "register_operand" "=x")
933 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
934 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
935 "TARGET_SSE"
936 "andnps\t{%2, %0|%0, %2}"
937 [(set_attr "type" "sselog")
938 (set_attr "mode" "V4SF")])
939
940 (define_expand "iorv4sf3"
941 [(set (match_operand:V4SF 0 "register_operand" "")
942 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
943 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
944 "TARGET_SSE"
945 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
946
947 (define_insn "*iorv4sf3"
948 [(set (match_operand:V4SF 0 "register_operand" "=x")
949 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
950 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
951 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
952 "orps\t{%2, %0|%0, %2}"
953 [(set_attr "type" "sselog")
954 (set_attr "mode" "V4SF")])
955
956 (define_expand "xorv4sf3"
957 [(set (match_operand:V4SF 0 "register_operand" "")
958 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
959 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
960 "TARGET_SSE"
961 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
962
963 (define_insn "*xorv4sf3"
964 [(set (match_operand:V4SF 0 "register_operand" "=x")
965 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
966 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
967 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
968 "xorps\t{%2, %0|%0, %2}"
969 [(set_attr "type" "sselog")
970 (set_attr "mode" "V4SF")])
971
972 ;; Also define scalar versions. These are used for abs, neg, and
973 ;; conditional move. Using subregs into vector modes causes register
974 ;; allocation lossage. These patterns do not allow memory operands
975 ;; because the native instructions read the full 128-bits.
976
977 (define_insn "*andsf3"
978 [(set (match_operand:SF 0 "register_operand" "=x")
979 (and:SF (match_operand:SF 1 "register_operand" "0")
980 (match_operand:SF 2 "register_operand" "x")))]
981 "TARGET_SSE"
982 "andps\t{%2, %0|%0, %2}"
983 [(set_attr "type" "sselog")
984 (set_attr "mode" "V4SF")])
985
986 (define_insn "*nandsf3"
987 [(set (match_operand:SF 0 "register_operand" "=x")
988 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
989 (match_operand:SF 2 "register_operand" "x")))]
990 "TARGET_SSE"
991 "andnps\t{%2, %0|%0, %2}"
992 [(set_attr "type" "sselog")
993 (set_attr "mode" "V4SF")])
994
995 (define_insn "*iorsf3"
996 [(set (match_operand:SF 0 "register_operand" "=x")
997 (ior:SF (match_operand:SF 1 "register_operand" "0")
998 (match_operand:SF 2 "register_operand" "x")))]
999 "TARGET_SSE"
1000 "orps\t{%2, %0|%0, %2}"
1001 [(set_attr "type" "sselog")
1002 (set_attr "mode" "V4SF")])
1003
1004 (define_insn "*xorsf3"
1005 [(set (match_operand:SF 0 "register_operand" "=x")
1006 (xor:SF (match_operand:SF 1 "register_operand" "0")
1007 (match_operand:SF 2 "register_operand" "x")))]
1008 "TARGET_SSE"
1009 "xorps\t{%2, %0|%0, %2}"
1010 [(set_attr "type" "sselog")
1011 (set_attr "mode" "V4SF")])
1012
1013 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1014 ;;
1015 ;; Parallel single-precision floating point conversion operations
1016 ;;
1017 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1018
1019 (define_insn "sse_cvtpi2ps"
1020 [(set (match_operand:V4SF 0 "register_operand" "=x")
1021 (vec_merge:V4SF
1022 (vec_duplicate:V4SF
1023 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1024 (match_operand:V4SF 1 "register_operand" "0")
1025 (const_int 3)))]
1026 "TARGET_SSE"
1027 "cvtpi2ps\t{%2, %0|%0, %2}"
1028 [(set_attr "type" "ssecvt")
1029 (set_attr "mode" "V4SF")])
1030
1031 (define_insn "sse_cvtps2pi"
1032 [(set (match_operand:V2SI 0 "register_operand" "=y")
1033 (vec_select:V2SI
1034 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1035 UNSPEC_FIX_NOTRUNC)
1036 (parallel [(const_int 0) (const_int 1)])))]
1037 "TARGET_SSE"
1038 "cvtps2pi\t{%1, %0|%0, %1}"
1039 [(set_attr "type" "ssecvt")
1040 (set_attr "unit" "mmx")
1041 (set_attr "mode" "DI")])
1042
1043 (define_insn "sse_cvttps2pi"
1044 [(set (match_operand:V2SI 0 "register_operand" "=y")
1045 (vec_select:V2SI
1046 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1047 (parallel [(const_int 0) (const_int 1)])))]
1048 "TARGET_SSE"
1049 "cvttps2pi\t{%1, %0|%0, %1}"
1050 [(set_attr "type" "ssecvt")
1051 (set_attr "unit" "mmx")
1052 (set_attr "mode" "SF")])
1053
1054 (define_insn "sse_cvtsi2ss"
1055 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1056 (vec_merge:V4SF
1057 (vec_duplicate:V4SF
1058 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1059 (match_operand:V4SF 1 "register_operand" "0,0")
1060 (const_int 1)))]
1061 "TARGET_SSE"
1062 "cvtsi2ss\t{%2, %0|%0, %2}"
1063 [(set_attr "type" "sseicvt")
1064 (set_attr "athlon_decode" "vector,double")
1065 (set_attr "amdfam10_decode" "vector,double")
1066 (set_attr "mode" "SF")])
1067
1068 (define_insn "sse_cvtsi2ssq"
1069 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1070 (vec_merge:V4SF
1071 (vec_duplicate:V4SF
1072 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
1073 (match_operand:V4SF 1 "register_operand" "0,0")
1074 (const_int 1)))]
1075 "TARGET_SSE && TARGET_64BIT"
1076 "cvtsi2ssq\t{%2, %0|%0, %2}"
1077 [(set_attr "type" "sseicvt")
1078 (set_attr "athlon_decode" "vector,double")
1079 (set_attr "amdfam10_decode" "vector,double")
1080 (set_attr "mode" "SF")])
1081
1082 (define_insn "sse_cvtss2si"
1083 [(set (match_operand:SI 0 "register_operand" "=r,r")
1084 (unspec:SI
1085 [(vec_select:SF
1086 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1087 (parallel [(const_int 0)]))]
1088 UNSPEC_FIX_NOTRUNC))]
1089 "TARGET_SSE"
1090 "cvtss2si\t{%1, %0|%0, %1}"
1091 [(set_attr "type" "sseicvt")
1092 (set_attr "athlon_decode" "double,vector")
1093 (set_attr "prefix_rep" "1")
1094 (set_attr "mode" "SI")])
1095
1096 (define_insn "sse_cvtss2si_2"
1097 [(set (match_operand:SI 0 "register_operand" "=r,r")
1098 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1099 UNSPEC_FIX_NOTRUNC))]
1100 "TARGET_SSE"
1101 "cvtss2si\t{%1, %0|%0, %1}"
1102 [(set_attr "type" "sseicvt")
1103 (set_attr "athlon_decode" "double,vector")
1104 (set_attr "amdfam10_decode" "double,double")
1105 (set_attr "prefix_rep" "1")
1106 (set_attr "mode" "SI")])
1107
1108 (define_insn "sse_cvtss2siq"
1109 [(set (match_operand:DI 0 "register_operand" "=r,r")
1110 (unspec:DI
1111 [(vec_select:SF
1112 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1113 (parallel [(const_int 0)]))]
1114 UNSPEC_FIX_NOTRUNC))]
1115 "TARGET_SSE && TARGET_64BIT"
1116 "cvtss2siq\t{%1, %0|%0, %1}"
1117 [(set_attr "type" "sseicvt")
1118 (set_attr "athlon_decode" "double,vector")
1119 (set_attr "prefix_rep" "1")
1120 (set_attr "mode" "DI")])
1121
1122 (define_insn "sse_cvtss2siq_2"
1123 [(set (match_operand:DI 0 "register_operand" "=r,r")
1124 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1125 UNSPEC_FIX_NOTRUNC))]
1126 "TARGET_SSE && TARGET_64BIT"
1127 "cvtss2siq\t{%1, %0|%0, %1}"
1128 [(set_attr "type" "sseicvt")
1129 (set_attr "athlon_decode" "double,vector")
1130 (set_attr "amdfam10_decode" "double,double")
1131 (set_attr "prefix_rep" "1")
1132 (set_attr "mode" "DI")])
1133
1134 (define_insn "sse_cvttss2si"
1135 [(set (match_operand:SI 0 "register_operand" "=r,r")
1136 (fix:SI
1137 (vec_select:SF
1138 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1139 (parallel [(const_int 0)]))))]
1140 "TARGET_SSE"
1141 "cvttss2si\t{%1, %0|%0, %1}"
1142 [(set_attr "type" "sseicvt")
1143 (set_attr "athlon_decode" "double,vector")
1144 (set_attr "amdfam10_decode" "double,double")
1145 (set_attr "prefix_rep" "1")
1146 (set_attr "mode" "SI")])
1147
1148 (define_insn "sse_cvttss2siq"
1149 [(set (match_operand:DI 0 "register_operand" "=r,r")
1150 (fix:DI
1151 (vec_select:SF
1152 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1153 (parallel [(const_int 0)]))))]
1154 "TARGET_SSE && TARGET_64BIT"
1155 "cvttss2siq\t{%1, %0|%0, %1}"
1156 [(set_attr "type" "sseicvt")
1157 (set_attr "athlon_decode" "double,vector")
1158 (set_attr "amdfam10_decode" "double,double")
1159 (set_attr "prefix_rep" "1")
1160 (set_attr "mode" "DI")])
1161
1162 (define_insn "sse2_cvtdq2ps"
1163 [(set (match_operand:V4SF 0 "register_operand" "=x")
1164 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1165 "TARGET_SSE2"
1166 "cvtdq2ps\t{%1, %0|%0, %1}"
1167 [(set_attr "type" "ssecvt")
1168 (set_attr "mode" "V4SF")])
1169
1170 (define_insn "sse2_cvtps2dq"
1171 [(set (match_operand:V4SI 0 "register_operand" "=x")
1172 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1173 UNSPEC_FIX_NOTRUNC))]
1174 "TARGET_SSE2"
1175 "cvtps2dq\t{%1, %0|%0, %1}"
1176 [(set_attr "type" "ssecvt")
1177 (set_attr "prefix_data16" "1")
1178 (set_attr "mode" "TI")])
1179
1180 (define_insn "sse2_cvttps2dq"
1181 [(set (match_operand:V4SI 0 "register_operand" "=x")
1182 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1183 "TARGET_SSE2"
1184 "cvttps2dq\t{%1, %0|%0, %1}"
1185 [(set_attr "type" "ssecvt")
1186 (set_attr "prefix_rep" "1")
1187 (set_attr "mode" "TI")])
1188
1189 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1190 ;;
1191 ;; Parallel single-precision floating point element swizzling
1192 ;;
1193 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1194
1195 (define_insn "sse_movhlps"
1196 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1197 (vec_select:V4SF
1198 (vec_concat:V8SF
1199 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1200 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1201 (parallel [(const_int 6)
1202 (const_int 7)
1203 (const_int 2)
1204 (const_int 3)])))]
1205 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1206 "@
1207 movhlps\t{%2, %0|%0, %2}
1208 movlps\t{%H2, %0|%0, %H2}
1209 movhps\t{%2, %0|%0, %2}"
1210 [(set_attr "type" "ssemov")
1211 (set_attr "mode" "V4SF,V2SF,V2SF")])
1212
1213 (define_insn "sse_movlhps"
1214 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1215 (vec_select:V4SF
1216 (vec_concat:V8SF
1217 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1218 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1219 (parallel [(const_int 0)
1220 (const_int 1)
1221 (const_int 4)
1222 (const_int 5)])))]
1223 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1224 "@
1225 movlhps\t{%2, %0|%0, %2}
1226 movhps\t{%2, %0|%0, %2}
1227 movlps\t{%2, %H0|%H0, %2}"
1228 [(set_attr "type" "ssemov")
1229 (set_attr "mode" "V4SF,V2SF,V2SF")])
1230
1231 (define_insn "sse_unpckhps"
1232 [(set (match_operand:V4SF 0 "register_operand" "=x")
1233 (vec_select:V4SF
1234 (vec_concat:V8SF
1235 (match_operand:V4SF 1 "register_operand" "0")
1236 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1237 (parallel [(const_int 2) (const_int 6)
1238 (const_int 3) (const_int 7)])))]
1239 "TARGET_SSE"
1240 "unpckhps\t{%2, %0|%0, %2}"
1241 [(set_attr "type" "sselog")
1242 (set_attr "mode" "V4SF")])
1243
1244 (define_insn "sse_unpcklps"
1245 [(set (match_operand:V4SF 0 "register_operand" "=x")
1246 (vec_select:V4SF
1247 (vec_concat:V8SF
1248 (match_operand:V4SF 1 "register_operand" "0")
1249 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1250 (parallel [(const_int 0) (const_int 4)
1251 (const_int 1) (const_int 5)])))]
1252 "TARGET_SSE"
1253 "unpcklps\t{%2, %0|%0, %2}"
1254 [(set_attr "type" "sselog")
1255 (set_attr "mode" "V4SF")])
1256
1257 ;; These are modeled with the same vec_concat as the others so that we
1258 ;; capture users of shufps that can use the new instructions
1259 (define_insn "sse3_movshdup"
1260 [(set (match_operand:V4SF 0 "register_operand" "=x")
1261 (vec_select:V4SF
1262 (vec_concat:V8SF
1263 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1264 (match_dup 1))
1265 (parallel [(const_int 1)
1266 (const_int 1)
1267 (const_int 7)
1268 (const_int 7)])))]
1269 "TARGET_SSE3"
1270 "movshdup\t{%1, %0|%0, %1}"
1271 [(set_attr "type" "sse")
1272 (set_attr "prefix_rep" "1")
1273 (set_attr "mode" "V4SF")])
1274
1275 (define_insn "sse3_movsldup"
1276 [(set (match_operand:V4SF 0 "register_operand" "=x")
1277 (vec_select:V4SF
1278 (vec_concat:V8SF
1279 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1280 (match_dup 1))
1281 (parallel [(const_int 0)
1282 (const_int 0)
1283 (const_int 6)
1284 (const_int 6)])))]
1285 "TARGET_SSE3"
1286 "movsldup\t{%1, %0|%0, %1}"
1287 [(set_attr "type" "sse")
1288 (set_attr "prefix_rep" "1")
1289 (set_attr "mode" "V4SF")])
1290
1291 (define_expand "sse_shufps"
1292 [(match_operand:V4SF 0 "register_operand" "")
1293 (match_operand:V4SF 1 "register_operand" "")
1294 (match_operand:V4SF 2 "nonimmediate_operand" "")
1295 (match_operand:SI 3 "const_int_operand" "")]
1296 "TARGET_SSE"
1297 {
1298 int mask = INTVAL (operands[3]);
1299 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1300 GEN_INT ((mask >> 0) & 3),
1301 GEN_INT ((mask >> 2) & 3),
1302 GEN_INT (((mask >> 4) & 3) + 4),
1303 GEN_INT (((mask >> 6) & 3) + 4)));
1304 DONE;
1305 })
1306
1307 (define_insn "sse_shufps_1"
1308 [(set (match_operand:V4SF 0 "register_operand" "=x")
1309 (vec_select:V4SF
1310 (vec_concat:V8SF
1311 (match_operand:V4SF 1 "register_operand" "0")
1312 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1313 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1314 (match_operand 4 "const_0_to_3_operand" "")
1315 (match_operand 5 "const_4_to_7_operand" "")
1316 (match_operand 6 "const_4_to_7_operand" "")])))]
1317 "TARGET_SSE"
1318 {
1319 int mask = 0;
1320 mask |= INTVAL (operands[3]) << 0;
1321 mask |= INTVAL (operands[4]) << 2;
1322 mask |= (INTVAL (operands[5]) - 4) << 4;
1323 mask |= (INTVAL (operands[6]) - 4) << 6;
1324 operands[3] = GEN_INT (mask);
1325
1326 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1327 }
1328 [(set_attr "type" "sselog")
1329 (set_attr "mode" "V4SF")])
1330
1331 (define_insn "sse_storehps"
1332 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1333 (vec_select:V2SF
1334 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1335 (parallel [(const_int 2) (const_int 3)])))]
1336 "TARGET_SSE"
1337 "@
1338 movhps\t{%1, %0|%0, %1}
1339 movhlps\t{%1, %0|%0, %1}
1340 movlps\t{%H1, %0|%0, %H1}"
1341 [(set_attr "type" "ssemov")
1342 (set_attr "mode" "V2SF,V4SF,V2SF")])
1343
1344 (define_insn "sse_loadhps"
1345 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1346 (vec_concat:V4SF
1347 (vec_select:V2SF
1348 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1349 (parallel [(const_int 0) (const_int 1)]))
1350 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1351 "TARGET_SSE"
1352 "@
1353 movhps\t{%2, %0|%0, %2}
1354 movlhps\t{%2, %0|%0, %2}
1355 movlps\t{%2, %H0|%H0, %2}"
1356 [(set_attr "type" "ssemov")
1357 (set_attr "mode" "V2SF,V4SF,V2SF")])
1358
1359 (define_insn "sse_storelps"
1360 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1361 (vec_select:V2SF
1362 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1363 (parallel [(const_int 0) (const_int 1)])))]
1364 "TARGET_SSE"
1365 "@
1366 movlps\t{%1, %0|%0, %1}
1367 movaps\t{%1, %0|%0, %1}
1368 movlps\t{%1, %0|%0, %1}"
1369 [(set_attr "type" "ssemov")
1370 (set_attr "mode" "V2SF,V4SF,V2SF")])
1371
1372 (define_insn "sse_loadlps"
1373 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1374 (vec_concat:V4SF
1375 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1376 (vec_select:V2SF
1377 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1378 (parallel [(const_int 2) (const_int 3)]))))]
1379 "TARGET_SSE"
1380 "@
1381 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1382 movlps\t{%2, %0|%0, %2}
1383 movlps\t{%2, %0|%0, %2}"
1384 [(set_attr "type" "sselog,ssemov,ssemov")
1385 (set_attr "mode" "V4SF,V2SF,V2SF")])
1386
1387 (define_insn "sse_movss"
1388 [(set (match_operand:V4SF 0 "register_operand" "=x")
1389 (vec_merge:V4SF
1390 (match_operand:V4SF 2 "register_operand" "x")
1391 (match_operand:V4SF 1 "register_operand" "0")
1392 (const_int 1)))]
1393 "TARGET_SSE"
1394 "movss\t{%2, %0|%0, %2}"
1395 [(set_attr "type" "ssemov")
1396 (set_attr "mode" "SF")])
1397
1398 (define_insn "*vec_dupv4sf"
1399 [(set (match_operand:V4SF 0 "register_operand" "=x")
1400 (vec_duplicate:V4SF
1401 (match_operand:SF 1 "register_operand" "0")))]
1402 "TARGET_SSE"
1403 "shufps\t{$0, %0, %0|%0, %0, 0}"
1404 [(set_attr "type" "sselog1")
1405 (set_attr "mode" "V4SF")])
1406
1407 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1408 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1409 ;; alternatives pretty much forces the MMX alternative to be chosen.
1410 (define_insn "*sse_concatv2sf"
1411 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1412 (vec_concat:V2SF
1413 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1414 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1415 "TARGET_SSE"
1416 "@
1417 unpcklps\t{%2, %0|%0, %2}
1418 movss\t{%1, %0|%0, %1}
1419 punpckldq\t{%2, %0|%0, %2}
1420 movd\t{%1, %0|%0, %1}"
1421 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1422 (set_attr "mode" "V4SF,SF,DI,DI")])
1423
1424 (define_insn "*sse_concatv4sf"
1425 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1426 (vec_concat:V4SF
1427 (match_operand:V2SF 1 "register_operand" " 0,0")
1428 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1429 "TARGET_SSE"
1430 "@
1431 movlhps\t{%2, %0|%0, %2}
1432 movhps\t{%2, %0|%0, %2}"
1433 [(set_attr "type" "ssemov")
1434 (set_attr "mode" "V4SF,V2SF")])
1435
1436 (define_expand "vec_initv4sf"
1437 [(match_operand:V4SF 0 "register_operand" "")
1438 (match_operand 1 "" "")]
1439 "TARGET_SSE"
1440 {
1441 ix86_expand_vector_init (false, operands[0], operands[1]);
1442 DONE;
1443 })
1444
1445 (define_insn "vec_setv4sf_0"
1446 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Yt,m")
1447 (vec_merge:V4SF
1448 (vec_duplicate:V4SF
1449 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1450 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1451 (const_int 1)))]
1452 "TARGET_SSE"
1453 "@
1454 movss\t{%2, %0|%0, %2}
1455 movss\t{%2, %0|%0, %2}
1456 movd\t{%2, %0|%0, %2}
1457 #"
1458 [(set_attr "type" "ssemov")
1459 (set_attr "mode" "SF")])
1460
1461 ;; A subset is vec_setv4sf.
1462 (define_insn "*vec_setv4sf_sse4_1"
1463 [(set (match_operand:V4SF 0 "register_operand" "=x")
1464 (vec_merge:V4SF
1465 (vec_duplicate:V4SF
1466 (match_operand:SF 2 "nonimmediate_operand" "xm"))
1467 (match_operand:V4SF 1 "register_operand" "0")
1468 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
1469 "TARGET_SSE4_1"
1470 {
1471 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
1472 return "insertps\t{%3, %2, %0|%0, %2, %3}";
1473 }
1474 [(set_attr "type" "sselog")
1475 (set_attr "prefix_extra" "1")
1476 (set_attr "mode" "V4SF")])
1477
1478 (define_insn "sse4_1_insertps"
1479 [(set (match_operand:V4SF 0 "register_operand" "=x")
1480 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
1481 (match_operand:V4SF 1 "register_operand" "0")
1482 (match_operand:SI 3 "const_0_to_255_operand" "n")]
1483 UNSPEC_INSERTPS))]
1484 "TARGET_SSE4_1"
1485 "insertps\t{%3, %2, %0|%0, %2, %3}";
1486 [(set_attr "type" "sselog")
1487 (set_attr "prefix_extra" "1")
1488 (set_attr "mode" "V4SF")])
1489
1490 (define_split
1491 [(set (match_operand:V4SF 0 "memory_operand" "")
1492 (vec_merge:V4SF
1493 (vec_duplicate:V4SF
1494 (match_operand:SF 1 "nonmemory_operand" ""))
1495 (match_dup 0)
1496 (const_int 1)))]
1497 "TARGET_SSE && reload_completed"
1498 [(const_int 0)]
1499 {
1500 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1501 DONE;
1502 })
1503
1504 (define_expand "vec_setv4sf"
1505 [(match_operand:V4SF 0 "register_operand" "")
1506 (match_operand:SF 1 "register_operand" "")
1507 (match_operand 2 "const_int_operand" "")]
1508 "TARGET_SSE"
1509 {
1510 ix86_expand_vector_set (false, operands[0], operands[1],
1511 INTVAL (operands[2]));
1512 DONE;
1513 })
1514
1515 (define_insn_and_split "*vec_extractv4sf_0"
1516 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1517 (vec_select:SF
1518 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1519 (parallel [(const_int 0)])))]
1520 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1521 "#"
1522 "&& reload_completed"
1523 [(const_int 0)]
1524 {
1525 rtx op1 = operands[1];
1526 if (REG_P (op1))
1527 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1528 else
1529 op1 = gen_lowpart (SFmode, op1);
1530 emit_move_insn (operands[0], op1);
1531 DONE;
1532 })
1533
1534 (define_insn "*sse4_1_extractps"
1535 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
1536 (vec_select:SF
1537 (match_operand:V4SF 1 "register_operand" "x")
1538 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
1539 "TARGET_SSE4_1"
1540 "extractps\t{%2, %1, %0|%0, %1, %2}"
1541 [(set_attr "type" "sselog")
1542 (set_attr "prefix_extra" "1")
1543 (set_attr "mode" "V4SF")])
1544
1545 (define_expand "vec_extractv4sf"
1546 [(match_operand:SF 0 "register_operand" "")
1547 (match_operand:V4SF 1 "register_operand" "")
1548 (match_operand 2 "const_int_operand" "")]
1549 "TARGET_SSE"
1550 {
1551 ix86_expand_vector_extract (false, operands[0], operands[1],
1552 INTVAL (operands[2]));
1553 DONE;
1554 })
1555
1556 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1557 ;;
1558 ;; Parallel double-precision floating point arithmetic
1559 ;;
1560 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1561
1562 (define_expand "negv2df2"
1563 [(set (match_operand:V2DF 0 "register_operand" "")
1564 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1565 "TARGET_SSE2"
1566 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1567
1568 (define_expand "absv2df2"
1569 [(set (match_operand:V2DF 0 "register_operand" "")
1570 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1571 "TARGET_SSE2"
1572 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1573
1574 (define_expand "addv2df3"
1575 [(set (match_operand:V2DF 0 "register_operand" "")
1576 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1577 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1578 "TARGET_SSE2"
1579 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1580
1581 (define_insn "*addv2df3"
1582 [(set (match_operand:V2DF 0 "register_operand" "=x")
1583 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1584 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1585 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1586 "addpd\t{%2, %0|%0, %2}"
1587 [(set_attr "type" "sseadd")
1588 (set_attr "mode" "V2DF")])
1589
1590 (define_insn "sse2_vmaddv2df3"
1591 [(set (match_operand:V2DF 0 "register_operand" "=x")
1592 (vec_merge:V2DF
1593 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1594 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1595 (match_dup 1)
1596 (const_int 1)))]
1597 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1598 "addsd\t{%2, %0|%0, %2}"
1599 [(set_attr "type" "sseadd")
1600 (set_attr "mode" "DF")])
1601
1602 (define_expand "subv2df3"
1603 [(set (match_operand:V2DF 0 "register_operand" "")
1604 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1605 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1606 "TARGET_SSE2"
1607 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1608
1609 (define_insn "*subv2df3"
1610 [(set (match_operand:V2DF 0 "register_operand" "=x")
1611 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1612 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1613 "TARGET_SSE2"
1614 "subpd\t{%2, %0|%0, %2}"
1615 [(set_attr "type" "sseadd")
1616 (set_attr "mode" "V2DF")])
1617
1618 (define_insn "sse2_vmsubv2df3"
1619 [(set (match_operand:V2DF 0 "register_operand" "=x")
1620 (vec_merge:V2DF
1621 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1622 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1623 (match_dup 1)
1624 (const_int 1)))]
1625 "TARGET_SSE2"
1626 "subsd\t{%2, %0|%0, %2}"
1627 [(set_attr "type" "sseadd")
1628 (set_attr "mode" "DF")])
1629
1630 (define_expand "mulv2df3"
1631 [(set (match_operand:V2DF 0 "register_operand" "")
1632 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1633 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1634 "TARGET_SSE2"
1635 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1636
1637 (define_insn "*mulv2df3"
1638 [(set (match_operand:V2DF 0 "register_operand" "=x")
1639 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1640 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1641 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1642 "mulpd\t{%2, %0|%0, %2}"
1643 [(set_attr "type" "ssemul")
1644 (set_attr "mode" "V2DF")])
1645
1646 (define_insn "sse2_vmmulv2df3"
1647 [(set (match_operand:V2DF 0 "register_operand" "=x")
1648 (vec_merge:V2DF
1649 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1650 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1651 (match_dup 1)
1652 (const_int 1)))]
1653 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1654 "mulsd\t{%2, %0|%0, %2}"
1655 [(set_attr "type" "ssemul")
1656 (set_attr "mode" "DF")])
1657
1658 (define_expand "divv2df3"
1659 [(set (match_operand:V2DF 0 "register_operand" "")
1660 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1661 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1662 "TARGET_SSE2"
1663 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1664
1665 (define_insn "*divv2df3"
1666 [(set (match_operand:V2DF 0 "register_operand" "=x")
1667 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1668 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1669 "TARGET_SSE2"
1670 "divpd\t{%2, %0|%0, %2}"
1671 [(set_attr "type" "ssediv")
1672 (set_attr "mode" "V2DF")])
1673
1674 (define_insn "sse2_vmdivv2df3"
1675 [(set (match_operand:V2DF 0 "register_operand" "=x")
1676 (vec_merge:V2DF
1677 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1678 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1679 (match_dup 1)
1680 (const_int 1)))]
1681 "TARGET_SSE2"
1682 "divsd\t{%2, %0|%0, %2}"
1683 [(set_attr "type" "ssediv")
1684 (set_attr "mode" "DF")])
1685
1686 (define_insn "sqrtv2df2"
1687 [(set (match_operand:V2DF 0 "register_operand" "=x")
1688 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1689 "TARGET_SSE2"
1690 "sqrtpd\t{%1, %0|%0, %1}"
1691 [(set_attr "type" "sse")
1692 (set_attr "mode" "V2DF")])
1693
1694 (define_insn "sse2_vmsqrtv2df2"
1695 [(set (match_operand:V2DF 0 "register_operand" "=x")
1696 (vec_merge:V2DF
1697 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1698 (match_operand:V2DF 2 "register_operand" "0")
1699 (const_int 1)))]
1700 "TARGET_SSE2"
1701 "sqrtsd\t{%1, %0|%0, %1}"
1702 [(set_attr "type" "sse")
1703 (set_attr "mode" "DF")])
1704
1705 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1706 ;; isn't really correct, as those rtl operators aren't defined when
1707 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1708
1709 (define_expand "smaxv2df3"
1710 [(set (match_operand:V2DF 0 "register_operand" "")
1711 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1712 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1713 "TARGET_SSE2"
1714 {
1715 if (!flag_finite_math_only)
1716 operands[1] = force_reg (V2DFmode, operands[1]);
1717 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1718 })
1719
1720 (define_insn "*smaxv2df3_finite"
1721 [(set (match_operand:V2DF 0 "register_operand" "=x")
1722 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1723 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1724 "TARGET_SSE2 && flag_finite_math_only
1725 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1726 "maxpd\t{%2, %0|%0, %2}"
1727 [(set_attr "type" "sseadd")
1728 (set_attr "mode" "V2DF")])
1729
1730 (define_insn "*smaxv2df3"
1731 [(set (match_operand:V2DF 0 "register_operand" "=x")
1732 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1733 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1734 "TARGET_SSE2"
1735 "maxpd\t{%2, %0|%0, %2}"
1736 [(set_attr "type" "sseadd")
1737 (set_attr "mode" "V2DF")])
1738
1739 (define_insn "sse2_vmsmaxv2df3"
1740 [(set (match_operand:V2DF 0 "register_operand" "=x")
1741 (vec_merge:V2DF
1742 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1743 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1744 (match_dup 1)
1745 (const_int 1)))]
1746 "TARGET_SSE2"
1747 "maxsd\t{%2, %0|%0, %2}"
1748 [(set_attr "type" "sseadd")
1749 (set_attr "mode" "DF")])
1750
1751 (define_expand "sminv2df3"
1752 [(set (match_operand:V2DF 0 "register_operand" "")
1753 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1754 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1755 "TARGET_SSE2"
1756 {
1757 if (!flag_finite_math_only)
1758 operands[1] = force_reg (V2DFmode, operands[1]);
1759 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1760 })
1761
1762 (define_insn "*sminv2df3_finite"
1763 [(set (match_operand:V2DF 0 "register_operand" "=x")
1764 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1765 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1766 "TARGET_SSE2 && flag_finite_math_only
1767 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1768 "minpd\t{%2, %0|%0, %2}"
1769 [(set_attr "type" "sseadd")
1770 (set_attr "mode" "V2DF")])
1771
1772 (define_insn "*sminv2df3"
1773 [(set (match_operand:V2DF 0 "register_operand" "=x")
1774 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1775 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1776 "TARGET_SSE2"
1777 "minpd\t{%2, %0|%0, %2}"
1778 [(set_attr "type" "sseadd")
1779 (set_attr "mode" "V2DF")])
1780
1781 (define_insn "sse2_vmsminv2df3"
1782 [(set (match_operand:V2DF 0 "register_operand" "=x")
1783 (vec_merge:V2DF
1784 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1785 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1786 (match_dup 1)
1787 (const_int 1)))]
1788 "TARGET_SSE2"
1789 "minsd\t{%2, %0|%0, %2}"
1790 [(set_attr "type" "sseadd")
1791 (set_attr "mode" "DF")])
1792
1793 (define_insn "sse3_addsubv2df3"
1794 [(set (match_operand:V2DF 0 "register_operand" "=x")
1795 (vec_merge:V2DF
1796 (plus:V2DF
1797 (match_operand:V2DF 1 "register_operand" "0")
1798 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1799 (minus:V2DF (match_dup 1) (match_dup 2))
1800 (const_int 1)))]
1801 "TARGET_SSE3"
1802 "addsubpd\t{%2, %0|%0, %2}"
1803 [(set_attr "type" "sseadd")
1804 (set_attr "mode" "V2DF")])
1805
1806 (define_insn "sse3_haddv2df3"
1807 [(set (match_operand:V2DF 0 "register_operand" "=x")
1808 (vec_concat:V2DF
1809 (plus:DF
1810 (vec_select:DF
1811 (match_operand:V2DF 1 "register_operand" "0")
1812 (parallel [(const_int 0)]))
1813 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1814 (plus:DF
1815 (vec_select:DF
1816 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1817 (parallel [(const_int 0)]))
1818 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1819 "TARGET_SSE3"
1820 "haddpd\t{%2, %0|%0, %2}"
1821 [(set_attr "type" "sseadd")
1822 (set_attr "mode" "V2DF")])
1823
1824 (define_insn "sse3_hsubv2df3"
1825 [(set (match_operand:V2DF 0 "register_operand" "=x")
1826 (vec_concat:V2DF
1827 (minus:DF
1828 (vec_select:DF
1829 (match_operand:V2DF 1 "register_operand" "0")
1830 (parallel [(const_int 0)]))
1831 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1832 (minus:DF
1833 (vec_select:DF
1834 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1835 (parallel [(const_int 0)]))
1836 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1837 "TARGET_SSE3"
1838 "hsubpd\t{%2, %0|%0, %2}"
1839 [(set_attr "type" "sseadd")
1840 (set_attr "mode" "V2DF")])
1841
1842 (define_expand "reduc_splus_v2df"
1843 [(match_operand:V2DF 0 "register_operand" "")
1844 (match_operand:V2DF 1 "register_operand" "")]
1845 "TARGET_SSE3"
1846 {
1847 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1848 DONE;
1849 })
1850
1851 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1852 ;;
1853 ;; Parallel double-precision floating point comparisons
1854 ;;
1855 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1856
1857 (define_insn "sse2_maskcmpv2df3"
1858 [(set (match_operand:V2DF 0 "register_operand" "=x")
1859 (match_operator:V2DF 3 "sse_comparison_operator"
1860 [(match_operand:V2DF 1 "register_operand" "0")
1861 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1862 "TARGET_SSE2"
1863 "cmp%D3pd\t{%2, %0|%0, %2}"
1864 [(set_attr "type" "ssecmp")
1865 (set_attr "mode" "V2DF")])
1866
1867 (define_insn "sse2_maskcmpdf3"
1868 [(set (match_operand:DF 0 "register_operand" "=x")
1869 (match_operator:DF 3 "sse_comparison_operator"
1870 [(match_operand:DF 1 "register_operand" "0")
1871 (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
1872 "TARGET_SSE2"
1873 "cmp%D3sd\t{%2, %0|%0, %2}"
1874 [(set_attr "type" "ssecmp")
1875 (set_attr "mode" "DF")])
1876
1877 (define_insn "sse2_vmmaskcmpv2df3"
1878 [(set (match_operand:V2DF 0 "register_operand" "=x")
1879 (vec_merge:V2DF
1880 (match_operator:V2DF 3 "sse_comparison_operator"
1881 [(match_operand:V2DF 1 "register_operand" "0")
1882 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1883 (match_dup 1)
1884 (const_int 1)))]
1885 "TARGET_SSE2"
1886 "cmp%D3sd\t{%2, %0|%0, %2}"
1887 [(set_attr "type" "ssecmp")
1888 (set_attr "mode" "DF")])
1889
1890 (define_insn "sse2_comi"
1891 [(set (reg:CCFP FLAGS_REG)
1892 (compare:CCFP
1893 (vec_select:DF
1894 (match_operand:V2DF 0 "register_operand" "x")
1895 (parallel [(const_int 0)]))
1896 (vec_select:DF
1897 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1898 (parallel [(const_int 0)]))))]
1899 "TARGET_SSE2"
1900 "comisd\t{%1, %0|%0, %1}"
1901 [(set_attr "type" "ssecomi")
1902 (set_attr "mode" "DF")])
1903
1904 (define_insn "sse2_ucomi"
1905 [(set (reg:CCFPU FLAGS_REG)
1906 (compare:CCFPU
1907 (vec_select:DF
1908 (match_operand:V2DF 0 "register_operand" "x")
1909 (parallel [(const_int 0)]))
1910 (vec_select:DF
1911 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1912 (parallel [(const_int 0)]))))]
1913 "TARGET_SSE2"
1914 "ucomisd\t{%1, %0|%0, %1}"
1915 [(set_attr "type" "ssecomi")
1916 (set_attr "mode" "DF")])
1917
1918 (define_expand "vcondv2df"
1919 [(set (match_operand:V2DF 0 "register_operand" "")
1920 (if_then_else:V2DF
1921 (match_operator 3 ""
1922 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1923 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1924 (match_operand:V2DF 1 "general_operand" "")
1925 (match_operand:V2DF 2 "general_operand" "")))]
1926 "TARGET_SSE2"
1927 {
1928 if (ix86_expand_fp_vcond (operands))
1929 DONE;
1930 else
1931 FAIL;
1932 })
1933
1934 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1935 ;;
1936 ;; Parallel double-precision floating point logical operations
1937 ;;
1938 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1939
1940 (define_expand "andv2df3"
1941 [(set (match_operand:V2DF 0 "register_operand" "")
1942 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1943 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1944 "TARGET_SSE2"
1945 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1946
1947 (define_insn "*andv2df3"
1948 [(set (match_operand:V2DF 0 "register_operand" "=x")
1949 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1950 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1951 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1952 "andpd\t{%2, %0|%0, %2}"
1953 [(set_attr "type" "sselog")
1954 (set_attr "mode" "V2DF")])
1955
1956 (define_insn "sse2_nandv2df3"
1957 [(set (match_operand:V2DF 0 "register_operand" "=x")
1958 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1959 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1960 "TARGET_SSE2"
1961 "andnpd\t{%2, %0|%0, %2}"
1962 [(set_attr "type" "sselog")
1963 (set_attr "mode" "V2DF")])
1964
1965 (define_expand "iorv2df3"
1966 [(set (match_operand:V2DF 0 "register_operand" "")
1967 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1968 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1969 "TARGET_SSE2"
1970 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1971
1972 (define_insn "*iorv2df3"
1973 [(set (match_operand:V2DF 0 "register_operand" "=x")
1974 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1975 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1976 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1977 "orpd\t{%2, %0|%0, %2}"
1978 [(set_attr "type" "sselog")
1979 (set_attr "mode" "V2DF")])
1980
1981 (define_expand "xorv2df3"
1982 [(set (match_operand:V2DF 0 "register_operand" "")
1983 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1984 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1985 "TARGET_SSE2"
1986 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1987
1988 (define_insn "*xorv2df3"
1989 [(set (match_operand:V2DF 0 "register_operand" "=x")
1990 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1991 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1992 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1993 "xorpd\t{%2, %0|%0, %2}"
1994 [(set_attr "type" "sselog")
1995 (set_attr "mode" "V2DF")])
1996
1997 ;; Also define scalar versions. These are used for abs, neg, and
1998 ;; conditional move. Using subregs into vector modes causes register
1999 ;; allocation lossage. These patterns do not allow memory operands
2000 ;; because the native instructions read the full 128-bits.
2001
2002 (define_insn "*anddf3"
2003 [(set (match_operand:DF 0 "register_operand" "=x")
2004 (and:DF (match_operand:DF 1 "register_operand" "0")
2005 (match_operand:DF 2 "register_operand" "x")))]
2006 "TARGET_SSE2"
2007 "andpd\t{%2, %0|%0, %2}"
2008 [(set_attr "type" "sselog")
2009 (set_attr "mode" "V2DF")])
2010
2011 (define_insn "*nanddf3"
2012 [(set (match_operand:DF 0 "register_operand" "=x")
2013 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
2014 (match_operand:DF 2 "register_operand" "x")))]
2015 "TARGET_SSE2"
2016 "andnpd\t{%2, %0|%0, %2}"
2017 [(set_attr "type" "sselog")
2018 (set_attr "mode" "V2DF")])
2019
2020 (define_insn "*iordf3"
2021 [(set (match_operand:DF 0 "register_operand" "=x")
2022 (ior:DF (match_operand:DF 1 "register_operand" "0")
2023 (match_operand:DF 2 "register_operand" "x")))]
2024 "TARGET_SSE2"
2025 "orpd\t{%2, %0|%0, %2}"
2026 [(set_attr "type" "sselog")
2027 (set_attr "mode" "V2DF")])
2028
2029 (define_insn "*xordf3"
2030 [(set (match_operand:DF 0 "register_operand" "=x")
2031 (xor:DF (match_operand:DF 1 "register_operand" "0")
2032 (match_operand:DF 2 "register_operand" "x")))]
2033 "TARGET_SSE2"
2034 "xorpd\t{%2, %0|%0, %2}"
2035 [(set_attr "type" "sselog")
2036 (set_attr "mode" "V2DF")])
2037
2038 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2039 ;;
2040 ;; Parallel double-precision floating point conversion operations
2041 ;;
2042 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2043
2044 (define_insn "sse2_cvtpi2pd"
2045 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2046 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2047 "TARGET_SSE2"
2048 "cvtpi2pd\t{%1, %0|%0, %1}"
2049 [(set_attr "type" "ssecvt")
2050 (set_attr "unit" "mmx,*")
2051 (set_attr "mode" "V2DF")])
2052
2053 (define_insn "sse2_cvtpd2pi"
2054 [(set (match_operand:V2SI 0 "register_operand" "=y")
2055 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2056 UNSPEC_FIX_NOTRUNC))]
2057 "TARGET_SSE2"
2058 "cvtpd2pi\t{%1, %0|%0, %1}"
2059 [(set_attr "type" "ssecvt")
2060 (set_attr "unit" "mmx")
2061 (set_attr "prefix_data16" "1")
2062 (set_attr "mode" "DI")])
2063
2064 (define_insn "sse2_cvttpd2pi"
2065 [(set (match_operand:V2SI 0 "register_operand" "=y")
2066 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2067 "TARGET_SSE2"
2068 "cvttpd2pi\t{%1, %0|%0, %1}"
2069 [(set_attr "type" "ssecvt")
2070 (set_attr "unit" "mmx")
2071 (set_attr "prefix_data16" "1")
2072 (set_attr "mode" "TI")])
2073
2074 (define_insn "sse2_cvtsi2sd"
2075 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2076 (vec_merge:V2DF
2077 (vec_duplicate:V2DF
2078 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2079 (match_operand:V2DF 1 "register_operand" "0,0")
2080 (const_int 1)))]
2081 "TARGET_SSE2"
2082 "cvtsi2sd\t{%2, %0|%0, %2}"
2083 [(set_attr "type" "sseicvt")
2084 (set_attr "mode" "DF")
2085 (set_attr "athlon_decode" "double,direct")
2086 (set_attr "amdfam10_decode" "vector,double")])
2087
2088 (define_insn "sse2_cvtsi2sdq"
2089 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2090 (vec_merge:V2DF
2091 (vec_duplicate:V2DF
2092 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2093 (match_operand:V2DF 1 "register_operand" "0,0")
2094 (const_int 1)))]
2095 "TARGET_SSE2 && TARGET_64BIT"
2096 "cvtsi2sdq\t{%2, %0|%0, %2}"
2097 [(set_attr "type" "sseicvt")
2098 (set_attr "mode" "DF")
2099 (set_attr "athlon_decode" "double,direct")
2100 (set_attr "amdfam10_decode" "vector,double")])
2101
2102 (define_insn "sse2_cvtsd2si"
2103 [(set (match_operand:SI 0 "register_operand" "=r,r")
2104 (unspec:SI
2105 [(vec_select:DF
2106 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2107 (parallel [(const_int 0)]))]
2108 UNSPEC_FIX_NOTRUNC))]
2109 "TARGET_SSE2"
2110 "cvtsd2si\t{%1, %0|%0, %1}"
2111 [(set_attr "type" "sseicvt")
2112 (set_attr "athlon_decode" "double,vector")
2113 (set_attr "prefix_rep" "1")
2114 (set_attr "mode" "SI")])
2115
2116 (define_insn "sse2_cvtsd2si_2"
2117 [(set (match_operand:SI 0 "register_operand" "=r,r")
2118 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2119 UNSPEC_FIX_NOTRUNC))]
2120 "TARGET_SSE2"
2121 "cvtsd2si\t{%1, %0|%0, %1}"
2122 [(set_attr "type" "sseicvt")
2123 (set_attr "athlon_decode" "double,vector")
2124 (set_attr "amdfam10_decode" "double,double")
2125 (set_attr "prefix_rep" "1")
2126 (set_attr "mode" "SI")])
2127
2128 (define_insn "sse2_cvtsd2siq"
2129 [(set (match_operand:DI 0 "register_operand" "=r,r")
2130 (unspec:DI
2131 [(vec_select:DF
2132 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2133 (parallel [(const_int 0)]))]
2134 UNSPEC_FIX_NOTRUNC))]
2135 "TARGET_SSE2 && TARGET_64BIT"
2136 "cvtsd2siq\t{%1, %0|%0, %1}"
2137 [(set_attr "type" "sseicvt")
2138 (set_attr "athlon_decode" "double,vector")
2139 (set_attr "prefix_rep" "1")
2140 (set_attr "mode" "DI")])
2141
2142 (define_insn "sse2_cvtsd2siq_2"
2143 [(set (match_operand:DI 0 "register_operand" "=r,r")
2144 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2145 UNSPEC_FIX_NOTRUNC))]
2146 "TARGET_SSE2 && TARGET_64BIT"
2147 "cvtsd2siq\t{%1, %0|%0, %1}"
2148 [(set_attr "type" "sseicvt")
2149 (set_attr "athlon_decode" "double,vector")
2150 (set_attr "amdfam10_decode" "double,double")
2151 (set_attr "prefix_rep" "1")
2152 (set_attr "mode" "DI")])
2153
2154 (define_insn "sse2_cvttsd2si"
2155 [(set (match_operand:SI 0 "register_operand" "=r,r")
2156 (fix:SI
2157 (vec_select:DF
2158 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2159 (parallel [(const_int 0)]))))]
2160 "TARGET_SSE2"
2161 "cvttsd2si\t{%1, %0|%0, %1}"
2162 [(set_attr "type" "sseicvt")
2163 (set_attr "prefix_rep" "1")
2164 (set_attr "mode" "SI")
2165 (set_attr "athlon_decode" "double,vector")
2166 (set_attr "amdfam10_decode" "double,double")])
2167
2168 (define_insn "sse2_cvttsd2siq"
2169 [(set (match_operand:DI 0 "register_operand" "=r,r")
2170 (fix:DI
2171 (vec_select:DF
2172 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2173 (parallel [(const_int 0)]))))]
2174 "TARGET_SSE2 && TARGET_64BIT"
2175 "cvttsd2siq\t{%1, %0|%0, %1}"
2176 [(set_attr "type" "sseicvt")
2177 (set_attr "prefix_rep" "1")
2178 (set_attr "mode" "DI")
2179 (set_attr "athlon_decode" "double,vector")
2180 (set_attr "amdfam10_decode" "double,double")])
2181
2182 (define_insn "sse2_cvtdq2pd"
2183 [(set (match_operand:V2DF 0 "register_operand" "=x")
2184 (float:V2DF
2185 (vec_select:V2SI
2186 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2187 (parallel [(const_int 0) (const_int 1)]))))]
2188 "TARGET_SSE2"
2189 "cvtdq2pd\t{%1, %0|%0, %1}"
2190 [(set_attr "type" "ssecvt")
2191 (set_attr "mode" "V2DF")])
2192
2193 (define_expand "sse2_cvtpd2dq"
2194 [(set (match_operand:V4SI 0 "register_operand" "")
2195 (vec_concat:V4SI
2196 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2197 UNSPEC_FIX_NOTRUNC)
2198 (match_dup 2)))]
2199 "TARGET_SSE2"
2200 "operands[2] = CONST0_RTX (V2SImode);")
2201
2202 (define_insn "*sse2_cvtpd2dq"
2203 [(set (match_operand:V4SI 0 "register_operand" "=x")
2204 (vec_concat:V4SI
2205 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2206 UNSPEC_FIX_NOTRUNC)
2207 (match_operand:V2SI 2 "const0_operand" "")))]
2208 "TARGET_SSE2"
2209 "cvtpd2dq\t{%1, %0|%0, %1}"
2210 [(set_attr "type" "ssecvt")
2211 (set_attr "prefix_rep" "1")
2212 (set_attr "mode" "TI")
2213 (set_attr "amdfam10_decode" "double")])
2214
2215 (define_expand "sse2_cvttpd2dq"
2216 [(set (match_operand:V4SI 0 "register_operand" "")
2217 (vec_concat:V4SI
2218 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2219 (match_dup 2)))]
2220 "TARGET_SSE2"
2221 "operands[2] = CONST0_RTX (V2SImode);")
2222
2223 (define_insn "*sse2_cvttpd2dq"
2224 [(set (match_operand:V4SI 0 "register_operand" "=x")
2225 (vec_concat:V4SI
2226 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2227 (match_operand:V2SI 2 "const0_operand" "")))]
2228 "TARGET_SSE2"
2229 "cvttpd2dq\t{%1, %0|%0, %1}"
2230 [(set_attr "type" "ssecvt")
2231 (set_attr "prefix_rep" "1")
2232 (set_attr "mode" "TI")
2233 (set_attr "amdfam10_decode" "double")])
2234
2235 (define_insn "sse2_cvtsd2ss"
2236 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2237 (vec_merge:V4SF
2238 (vec_duplicate:V4SF
2239 (float_truncate:V2SF
2240 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2241 (match_operand:V4SF 1 "register_operand" "0,0")
2242 (const_int 1)))]
2243 "TARGET_SSE2"
2244 "cvtsd2ss\t{%2, %0|%0, %2}"
2245 [(set_attr "type" "ssecvt")
2246 (set_attr "athlon_decode" "vector,double")
2247 (set_attr "amdfam10_decode" "vector,double")
2248 (set_attr "mode" "SF")])
2249
2250 (define_insn "sse2_cvtss2sd"
2251 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2252 (vec_merge:V2DF
2253 (float_extend:V2DF
2254 (vec_select:V2SF
2255 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2256 (parallel [(const_int 0) (const_int 1)])))
2257 (match_operand:V2DF 1 "register_operand" "0,0")
2258 (const_int 1)))]
2259 "TARGET_SSE2"
2260 "cvtss2sd\t{%2, %0|%0, %2}"
2261 [(set_attr "type" "ssecvt")
2262 (set_attr "amdfam10_decode" "vector,double")
2263 (set_attr "mode" "DF")])
2264
2265 (define_expand "sse2_cvtpd2ps"
2266 [(set (match_operand:V4SF 0 "register_operand" "")
2267 (vec_concat:V4SF
2268 (float_truncate:V2SF
2269 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2270 (match_dup 2)))]
2271 "TARGET_SSE2"
2272 "operands[2] = CONST0_RTX (V2SFmode);")
2273
2274 (define_insn "*sse2_cvtpd2ps"
2275 [(set (match_operand:V4SF 0 "register_operand" "=x")
2276 (vec_concat:V4SF
2277 (float_truncate:V2SF
2278 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2279 (match_operand:V2SF 2 "const0_operand" "")))]
2280 "TARGET_SSE2"
2281 "cvtpd2ps\t{%1, %0|%0, %1}"
2282 [(set_attr "type" "ssecvt")
2283 (set_attr "prefix_data16" "1")
2284 (set_attr "mode" "V4SF")
2285 (set_attr "amdfam10_decode" "double")])
2286
2287 (define_insn "sse2_cvtps2pd"
2288 [(set (match_operand:V2DF 0 "register_operand" "=x")
2289 (float_extend:V2DF
2290 (vec_select:V2SF
2291 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2292 (parallel [(const_int 0) (const_int 1)]))))]
2293 "TARGET_SSE2"
2294 "cvtps2pd\t{%1, %0|%0, %1}"
2295 [(set_attr "type" "ssecvt")
2296 (set_attr "mode" "V2DF")
2297 (set_attr "amdfam10_decode" "direct")])
2298
2299 (define_expand "vec_unpacks_hi_v4sf"
2300 [(set (match_dup 2)
2301 (vec_select:V4SF
2302 (vec_concat:V8SF
2303 (match_dup 2)
2304 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2305 (parallel [(const_int 6)
2306 (const_int 7)
2307 (const_int 2)
2308 (const_int 3)])))
2309 (set (match_operand:V2DF 0 "register_operand" "")
2310 (float_extend:V2DF
2311 (vec_select:V2SF
2312 (match_dup 2)
2313 (parallel [(const_int 0) (const_int 1)]))))]
2314 "TARGET_SSE2"
2315 {
2316 operands[2] = gen_reg_rtx (V4SFmode);
2317 })
2318
2319 (define_expand "vec_unpacks_lo_v4sf"
2320 [(set (match_operand:V2DF 0 "register_operand" "")
2321 (float_extend:V2DF
2322 (vec_select:V2SF
2323 (match_operand:V4SF 1 "nonimmediate_operand" "")
2324 (parallel [(const_int 0) (const_int 1)]))))]
2325 "TARGET_SSE2")
2326
2327 (define_expand "vec_unpacks_float_hi_v8hi"
2328 [(match_operand:V4SF 0 "register_operand" "")
2329 (match_operand:V8HI 1 "register_operand" "")]
2330 "TARGET_SSE2"
2331 {
2332 rtx tmp = gen_reg_rtx (V4SImode);
2333
2334 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2335 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2336 DONE;
2337 })
2338
2339 (define_expand "vec_unpacks_float_lo_v8hi"
2340 [(match_operand:V4SF 0 "register_operand" "")
2341 (match_operand:V8HI 1 "register_operand" "")]
2342 "TARGET_SSE2"
2343 {
2344 rtx tmp = gen_reg_rtx (V4SImode);
2345
2346 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2347 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2348 DONE;
2349 })
2350
2351 (define_expand "vec_unpacku_float_hi_v8hi"
2352 [(match_operand:V4SF 0 "register_operand" "")
2353 (match_operand:V8HI 1 "register_operand" "")]
2354 "TARGET_SSE2"
2355 {
2356 rtx tmp = gen_reg_rtx (V4SImode);
2357
2358 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2359 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2360 DONE;
2361 })
2362
2363 (define_expand "vec_unpacku_float_lo_v8hi"
2364 [(match_operand:V4SF 0 "register_operand" "")
2365 (match_operand:V8HI 1 "register_operand" "")]
2366 "TARGET_SSE2"
2367 {
2368 rtx tmp = gen_reg_rtx (V4SImode);
2369
2370 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2371 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2372 DONE;
2373 })
2374
2375 (define_expand "vec_unpacks_float_hi_v4si"
2376 [(set (match_dup 2)
2377 (vec_select:V4SI
2378 (match_operand:V4SI 1 "nonimmediate_operand" "")
2379 (parallel [(const_int 2)
2380 (const_int 3)
2381 (const_int 2)
2382 (const_int 3)])))
2383 (set (match_operand:V2DF 0 "register_operand" "")
2384 (float:V2DF
2385 (vec_select:V2SI
2386 (match_dup 2)
2387 (parallel [(const_int 0) (const_int 1)]))))]
2388 "TARGET_SSE2"
2389 {
2390 operands[2] = gen_reg_rtx (V4SImode);
2391 })
2392
2393 (define_expand "vec_unpacks_float_lo_v4si"
2394 [(set (match_operand:V2DF 0 "register_operand" "")
2395 (float:V2DF
2396 (vec_select:V2SI
2397 (match_operand:V4SI 1 "nonimmediate_operand" "")
2398 (parallel [(const_int 0) (const_int 1)]))))]
2399 "TARGET_SSE2")
2400
2401 (define_expand "vec_pack_trunc_v2df"
2402 [(match_operand:V4SF 0 "register_operand" "")
2403 (match_operand:V2DF 1 "nonimmediate_operand" "")
2404 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2405 "TARGET_SSE2"
2406 {
2407 rtx r1, r2;
2408
2409 r1 = gen_reg_rtx (V4SFmode);
2410 r2 = gen_reg_rtx (V4SFmode);
2411
2412 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2413 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2414 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2415 DONE;
2416 })
2417
2418 (define_expand "vec_pack_sfix_trunc_v2df"
2419 [(match_operand:V4SI 0 "register_operand" "")
2420 (match_operand:V2DF 1 "nonimmediate_operand" "")
2421 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2422 "TARGET_SSE2"
2423 {
2424 rtx r1, r2;
2425
2426 r1 = gen_reg_rtx (V4SImode);
2427 r2 = gen_reg_rtx (V4SImode);
2428
2429 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2430 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2431 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2432 gen_lowpart (V2DImode, r1),
2433 gen_lowpart (V2DImode, r2)));
2434 DONE;
2435 })
2436
2437 (define_expand "vec_pack_sfix_v2df"
2438 [(match_operand:V4SI 0 "register_operand" "")
2439 (match_operand:V2DF 1 "nonimmediate_operand" "")
2440 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2441 "TARGET_SSE2"
2442 {
2443 rtx r1, r2;
2444
2445 r1 = gen_reg_rtx (V4SImode);
2446 r2 = gen_reg_rtx (V4SImode);
2447
2448 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2449 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2450 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
2451 gen_lowpart (V2DImode, r1),
2452 gen_lowpart (V2DImode, r2)));
2453 DONE;
2454 })
2455
2456
2457 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2458 ;;
2459 ;; Parallel double-precision floating point element swizzling
2460 ;;
2461 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2462
2463 (define_insn "sse2_unpckhpd"
2464 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2465 (vec_select:V2DF
2466 (vec_concat:V4DF
2467 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2468 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2469 (parallel [(const_int 1)
2470 (const_int 3)])))]
2471 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2472 "@
2473 unpckhpd\t{%2, %0|%0, %2}
2474 movlpd\t{%H1, %0|%0, %H1}
2475 movhpd\t{%1, %0|%0, %1}"
2476 [(set_attr "type" "sselog,ssemov,ssemov")
2477 (set_attr "mode" "V2DF,V1DF,V1DF")])
2478
2479 (define_insn "*sse3_movddup"
2480 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2481 (vec_select:V2DF
2482 (vec_concat:V4DF
2483 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2484 (match_dup 1))
2485 (parallel [(const_int 0)
2486 (const_int 2)])))]
2487 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2488 "@
2489 movddup\t{%1, %0|%0, %1}
2490 #"
2491 [(set_attr "type" "sselog1,ssemov")
2492 (set_attr "mode" "V2DF")])
2493
2494 (define_split
2495 [(set (match_operand:V2DF 0 "memory_operand" "")
2496 (vec_select:V2DF
2497 (vec_concat:V4DF
2498 (match_operand:V2DF 1 "register_operand" "")
2499 (match_dup 1))
2500 (parallel [(const_int 0)
2501 (const_int 2)])))]
2502 "TARGET_SSE3 && reload_completed"
2503 [(const_int 0)]
2504 {
2505 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2506 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2507 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2508 DONE;
2509 })
2510
2511 (define_insn "sse2_unpcklpd"
2512 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2513 (vec_select:V2DF
2514 (vec_concat:V4DF
2515 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2516 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2517 (parallel [(const_int 0)
2518 (const_int 2)])))]
2519 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2520 "@
2521 unpcklpd\t{%2, %0|%0, %2}
2522 movhpd\t{%2, %0|%0, %2}
2523 movlpd\t{%2, %H0|%H0, %2}"
2524 [(set_attr "type" "sselog,ssemov,ssemov")
2525 (set_attr "mode" "V2DF,V1DF,V1DF")])
2526
2527 (define_expand "sse2_shufpd"
2528 [(match_operand:V2DF 0 "register_operand" "")
2529 (match_operand:V2DF 1 "register_operand" "")
2530 (match_operand:V2DF 2 "nonimmediate_operand" "")
2531 (match_operand:SI 3 "const_int_operand" "")]
2532 "TARGET_SSE2"
2533 {
2534 int mask = INTVAL (operands[3]);
2535 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2536 GEN_INT (mask & 1),
2537 GEN_INT (mask & 2 ? 3 : 2)));
2538 DONE;
2539 })
2540
2541 (define_insn "sse2_shufpd_1"
2542 [(set (match_operand:V2DF 0 "register_operand" "=x")
2543 (vec_select:V2DF
2544 (vec_concat:V4DF
2545 (match_operand:V2DF 1 "register_operand" "0")
2546 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2547 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2548 (match_operand 4 "const_2_to_3_operand" "")])))]
2549 "TARGET_SSE2"
2550 {
2551 int mask;
2552 mask = INTVAL (operands[3]);
2553 mask |= (INTVAL (operands[4]) - 2) << 1;
2554 operands[3] = GEN_INT (mask);
2555
2556 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2557 }
2558 [(set_attr "type" "sselog")
2559 (set_attr "mode" "V2DF")])
2560
2561 (define_insn "sse2_storehpd"
2562 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2563 (vec_select:DF
2564 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2565 (parallel [(const_int 1)])))]
2566 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2567 "@
2568 movhpd\t{%1, %0|%0, %1}
2569 unpckhpd\t%0, %0
2570 #"
2571 [(set_attr "type" "ssemov,sselog1,ssemov")
2572 (set_attr "mode" "V1DF,V2DF,DF")])
2573
2574 (define_split
2575 [(set (match_operand:DF 0 "register_operand" "")
2576 (vec_select:DF
2577 (match_operand:V2DF 1 "memory_operand" "")
2578 (parallel [(const_int 1)])))]
2579 "TARGET_SSE2 && reload_completed"
2580 [(set (match_dup 0) (match_dup 1))]
2581 {
2582 operands[1] = adjust_address (operands[1], DFmode, 8);
2583 })
2584
2585 (define_insn "sse2_storelpd"
2586 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2587 (vec_select:DF
2588 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2589 (parallel [(const_int 0)])))]
2590 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2591 "@
2592 movlpd\t{%1, %0|%0, %1}
2593 #
2594 #"
2595 [(set_attr "type" "ssemov")
2596 (set_attr "mode" "V1DF,DF,DF")])
2597
2598 (define_split
2599 [(set (match_operand:DF 0 "register_operand" "")
2600 (vec_select:DF
2601 (match_operand:V2DF 1 "nonimmediate_operand" "")
2602 (parallel [(const_int 0)])))]
2603 "TARGET_SSE2 && reload_completed"
2604 [(const_int 0)]
2605 {
2606 rtx op1 = operands[1];
2607 if (REG_P (op1))
2608 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2609 else
2610 op1 = gen_lowpart (DFmode, op1);
2611 emit_move_insn (operands[0], op1);
2612 DONE;
2613 })
2614
2615 (define_insn "sse2_loadhpd"
2616 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2617 (vec_concat:V2DF
2618 (vec_select:DF
2619 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2620 (parallel [(const_int 0)]))
2621 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2622 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2623 "@
2624 movhpd\t{%2, %0|%0, %2}
2625 unpcklpd\t{%2, %0|%0, %2}
2626 shufpd\t{$1, %1, %0|%0, %1, 1}
2627 #"
2628 [(set_attr "type" "ssemov,sselog,sselog,other")
2629 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2630
2631 (define_split
2632 [(set (match_operand:V2DF 0 "memory_operand" "")
2633 (vec_concat:V2DF
2634 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2635 (match_operand:DF 1 "register_operand" "")))]
2636 "TARGET_SSE2 && reload_completed"
2637 [(set (match_dup 0) (match_dup 1))]
2638 {
2639 operands[0] = adjust_address (operands[0], DFmode, 8);
2640 })
2641
2642 (define_insn "sse2_loadlpd"
2643 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2644 (vec_concat:V2DF
2645 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2646 (vec_select:DF
2647 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2648 (parallel [(const_int 1)]))))]
2649 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2650 "@
2651 movsd\t{%2, %0|%0, %2}
2652 movlpd\t{%2, %0|%0, %2}
2653 movsd\t{%2, %0|%0, %2}
2654 shufpd\t{$2, %2, %0|%0, %2, 2}
2655 movhpd\t{%H1, %0|%0, %H1}
2656 #"
2657 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2658 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2659
2660 (define_split
2661 [(set (match_operand:V2DF 0 "memory_operand" "")
2662 (vec_concat:V2DF
2663 (match_operand:DF 1 "register_operand" "")
2664 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2665 "TARGET_SSE2 && reload_completed"
2666 [(set (match_dup 0) (match_dup 1))]
2667 {
2668 operands[0] = adjust_address (operands[0], DFmode, 8);
2669 })
2670
2671 ;; Not sure these two are ever used, but it doesn't hurt to have
2672 ;; them. -aoliva
2673 (define_insn "*vec_extractv2df_1_sse"
2674 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2675 (vec_select:DF
2676 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2677 (parallel [(const_int 1)])))]
2678 "!TARGET_SSE2 && TARGET_SSE
2679 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2680 "@
2681 movhps\t{%1, %0|%0, %1}
2682 movhlps\t{%1, %0|%0, %1}
2683 movlps\t{%H1, %0|%0, %H1}"
2684 [(set_attr "type" "ssemov")
2685 (set_attr "mode" "V2SF,V4SF,V2SF")])
2686
2687 (define_insn "*vec_extractv2df_0_sse"
2688 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2689 (vec_select:DF
2690 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2691 (parallel [(const_int 0)])))]
2692 "!TARGET_SSE2 && TARGET_SSE
2693 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2694 "@
2695 movlps\t{%1, %0|%0, %1}
2696 movaps\t{%1, %0|%0, %1}
2697 movlps\t{%1, %0|%0, %1}"
2698 [(set_attr "type" "ssemov")
2699 (set_attr "mode" "V2SF,V4SF,V2SF")])
2700
2701 (define_insn "sse2_movsd"
2702 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2703 (vec_merge:V2DF
2704 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2705 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2706 (const_int 1)))]
2707 "TARGET_SSE2"
2708 "@
2709 movsd\t{%2, %0|%0, %2}
2710 movlpd\t{%2, %0|%0, %2}
2711 movlpd\t{%2, %0|%0, %2}
2712 shufpd\t{$2, %2, %0|%0, %2, 2}
2713 movhps\t{%H1, %0|%0, %H1}
2714 movhps\t{%1, %H0|%H0, %1}"
2715 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2716 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2717
2718 (define_insn "*vec_dupv2df_sse3"
2719 [(set (match_operand:V2DF 0 "register_operand" "=x")
2720 (vec_duplicate:V2DF
2721 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2722 "TARGET_SSE3"
2723 "movddup\t{%1, %0|%0, %1}"
2724 [(set_attr "type" "sselog1")
2725 (set_attr "mode" "DF")])
2726
2727 (define_insn "*vec_dupv2df"
2728 [(set (match_operand:V2DF 0 "register_operand" "=x")
2729 (vec_duplicate:V2DF
2730 (match_operand:DF 1 "register_operand" "0")))]
2731 "TARGET_SSE2"
2732 "unpcklpd\t%0, %0"
2733 [(set_attr "type" "sselog1")
2734 (set_attr "mode" "V2DF")])
2735
2736 (define_insn "*vec_concatv2df_sse3"
2737 [(set (match_operand:V2DF 0 "register_operand" "=x")
2738 (vec_concat:V2DF
2739 (match_operand:DF 1 "nonimmediate_operand" "xm")
2740 (match_dup 1)))]
2741 "TARGET_SSE3"
2742 "movddup\t{%1, %0|%0, %1}"
2743 [(set_attr "type" "sselog1")
2744 (set_attr "mode" "DF")])
2745
2746 (define_insn "*vec_concatv2df"
2747 [(set (match_operand:V2DF 0 "register_operand" "=Yt,Yt,Yt,x,x")
2748 (vec_concat:V2DF
2749 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
2750 (match_operand:DF 2 "vector_move_operand" " Yt,m ,C ,x,m")))]
2751 "TARGET_SSE"
2752 "@
2753 unpcklpd\t{%2, %0|%0, %2}
2754 movhpd\t{%2, %0|%0, %2}
2755 movsd\t{%1, %0|%0, %1}
2756 movlhps\t{%2, %0|%0, %2}
2757 movhps\t{%2, %0|%0, %2}"
2758 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2759 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2760
2761 (define_expand "vec_setv2df"
2762 [(match_operand:V2DF 0 "register_operand" "")
2763 (match_operand:DF 1 "register_operand" "")
2764 (match_operand 2 "const_int_operand" "")]
2765 "TARGET_SSE"
2766 {
2767 ix86_expand_vector_set (false, operands[0], operands[1],
2768 INTVAL (operands[2]));
2769 DONE;
2770 })
2771
2772 (define_expand "vec_extractv2df"
2773 [(match_operand:DF 0 "register_operand" "")
2774 (match_operand:V2DF 1 "register_operand" "")
2775 (match_operand 2 "const_int_operand" "")]
2776 "TARGET_SSE"
2777 {
2778 ix86_expand_vector_extract (false, operands[0], operands[1],
2779 INTVAL (operands[2]));
2780 DONE;
2781 })
2782
2783 (define_expand "vec_initv2df"
2784 [(match_operand:V2DF 0 "register_operand" "")
2785 (match_operand 1 "" "")]
2786 "TARGET_SSE"
2787 {
2788 ix86_expand_vector_init (false, operands[0], operands[1]);
2789 DONE;
2790 })
2791
2792 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2793 ;;
2794 ;; Parallel integral arithmetic
2795 ;;
2796 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2797
2798 (define_expand "neg<mode>2"
2799 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2800 (minus:SSEMODEI
2801 (match_dup 2)
2802 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2803 "TARGET_SSE2"
2804 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2805
2806 (define_expand "add<mode>3"
2807 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2808 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2809 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2810 "TARGET_SSE2"
2811 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2812
2813 (define_insn "*add<mode>3"
2814 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2815 (plus:SSEMODEI
2816 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2817 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2818 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2819 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2820 [(set_attr "type" "sseiadd")
2821 (set_attr "prefix_data16" "1")
2822 (set_attr "mode" "TI")])
2823
2824 (define_insn "sse2_ssadd<mode>3"
2825 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2826 (ss_plus:SSEMODE12
2827 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2828 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2829 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2830 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2831 [(set_attr "type" "sseiadd")
2832 (set_attr "prefix_data16" "1")
2833 (set_attr "mode" "TI")])
2834
2835 (define_insn "sse2_usadd<mode>3"
2836 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2837 (us_plus:SSEMODE12
2838 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2839 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2840 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2841 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2842 [(set_attr "type" "sseiadd")
2843 (set_attr "prefix_data16" "1")
2844 (set_attr "mode" "TI")])
2845
2846 (define_expand "sub<mode>3"
2847 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2848 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2849 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2850 "TARGET_SSE2"
2851 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2852
2853 (define_insn "*sub<mode>3"
2854 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2855 (minus:SSEMODEI
2856 (match_operand:SSEMODEI 1 "register_operand" "0")
2857 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2858 "TARGET_SSE2"
2859 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2860 [(set_attr "type" "sseiadd")
2861 (set_attr "prefix_data16" "1")
2862 (set_attr "mode" "TI")])
2863
2864 (define_insn "sse2_sssub<mode>3"
2865 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2866 (ss_minus:SSEMODE12
2867 (match_operand:SSEMODE12 1 "register_operand" "0")
2868 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2869 "TARGET_SSE2"
2870 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2871 [(set_attr "type" "sseiadd")
2872 (set_attr "prefix_data16" "1")
2873 (set_attr "mode" "TI")])
2874
2875 (define_insn "sse2_ussub<mode>3"
2876 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2877 (us_minus:SSEMODE12
2878 (match_operand:SSEMODE12 1 "register_operand" "0")
2879 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2880 "TARGET_SSE2"
2881 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2882 [(set_attr "type" "sseiadd")
2883 (set_attr "prefix_data16" "1")
2884 (set_attr "mode" "TI")])
2885
2886 (define_expand "mulv16qi3"
2887 [(set (match_operand:V16QI 0 "register_operand" "")
2888 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2889 (match_operand:V16QI 2 "register_operand" "")))]
2890 "TARGET_SSE2"
2891 {
2892 rtx t[12], op0;
2893 int i;
2894
2895 for (i = 0; i < 12; ++i)
2896 t[i] = gen_reg_rtx (V16QImode);
2897
2898 /* Unpack data such that we've got a source byte in each low byte of
2899 each word. We don't care what goes into the high byte of each word.
2900 Rather than trying to get zero in there, most convenient is to let
2901 it be a copy of the low byte. */
2902 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2903 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2904 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2905 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2906
2907 /* Multiply words. The end-of-line annotations here give a picture of what
2908 the output of that instruction looks like. Dot means don't care; the
2909 letters are the bytes of the result with A being the most significant. */
2910 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2911 gen_lowpart (V8HImode, t[0]),
2912 gen_lowpart (V8HImode, t[1])));
2913 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2914 gen_lowpart (V8HImode, t[2]),
2915 gen_lowpart (V8HImode, t[3])));
2916
2917 /* Extract the relevant bytes and merge them back together. */
2918 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2919 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2920 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2921 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2922 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2923 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2924
2925 op0 = operands[0];
2926 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2927 DONE;
2928 })
2929
2930 (define_expand "mulv8hi3"
2931 [(set (match_operand:V8HI 0 "register_operand" "")
2932 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2933 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2934 "TARGET_SSE2"
2935 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2936
2937 (define_insn "*mulv8hi3"
2938 [(set (match_operand:V8HI 0 "register_operand" "=x")
2939 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2940 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2941 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2942 "pmullw\t{%2, %0|%0, %2}"
2943 [(set_attr "type" "sseimul")
2944 (set_attr "prefix_data16" "1")
2945 (set_attr "mode" "TI")])
2946
2947 (define_expand "smulv8hi3_highpart"
2948 [(set (match_operand:V8HI 0 "register_operand" "")
2949 (truncate:V8HI
2950 (lshiftrt:V8SI
2951 (mult:V8SI
2952 (sign_extend:V8SI
2953 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2954 (sign_extend:V8SI
2955 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2956 (const_int 16))))]
2957 "TARGET_SSE2"
2958 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2959
2960 (define_insn "*smulv8hi3_highpart"
2961 [(set (match_operand:V8HI 0 "register_operand" "=x")
2962 (truncate:V8HI
2963 (lshiftrt:V8SI
2964 (mult:V8SI
2965 (sign_extend:V8SI
2966 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2967 (sign_extend:V8SI
2968 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2969 (const_int 16))))]
2970 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2971 "pmulhw\t{%2, %0|%0, %2}"
2972 [(set_attr "type" "sseimul")
2973 (set_attr "prefix_data16" "1")
2974 (set_attr "mode" "TI")])
2975
2976 (define_expand "umulv8hi3_highpart"
2977 [(set (match_operand:V8HI 0 "register_operand" "")
2978 (truncate:V8HI
2979 (lshiftrt:V8SI
2980 (mult:V8SI
2981 (zero_extend:V8SI
2982 (match_operand:V8HI 1 "nonimmediate_operand" ""))
2983 (zero_extend:V8SI
2984 (match_operand:V8HI 2 "nonimmediate_operand" "")))
2985 (const_int 16))))]
2986 "TARGET_SSE2"
2987 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2988
2989 (define_insn "*umulv8hi3_highpart"
2990 [(set (match_operand:V8HI 0 "register_operand" "=x")
2991 (truncate:V8HI
2992 (lshiftrt:V8SI
2993 (mult:V8SI
2994 (zero_extend:V8SI
2995 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2996 (zero_extend:V8SI
2997 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2998 (const_int 16))))]
2999 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3000 "pmulhuw\t{%2, %0|%0, %2}"
3001 [(set_attr "type" "sseimul")
3002 (set_attr "prefix_data16" "1")
3003 (set_attr "mode" "TI")])
3004
3005 (define_insn "sse2_umulv2siv2di3"
3006 [(set (match_operand:V2DI 0 "register_operand" "=x")
3007 (mult:V2DI
3008 (zero_extend:V2DI
3009 (vec_select:V2SI
3010 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3011 (parallel [(const_int 0) (const_int 2)])))
3012 (zero_extend:V2DI
3013 (vec_select:V2SI
3014 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3015 (parallel [(const_int 0) (const_int 2)])))))]
3016 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3017 "pmuludq\t{%2, %0|%0, %2}"
3018 [(set_attr "type" "sseimul")
3019 (set_attr "prefix_data16" "1")
3020 (set_attr "mode" "TI")])
3021
3022 (define_insn "sse4_1_mulv2siv2di3"
3023 [(set (match_operand:V2DI 0 "register_operand" "=x")
3024 (mult:V2DI
3025 (sign_extend:V2DI
3026 (vec_select:V2SI
3027 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3028 (parallel [(const_int 0) (const_int 2)])))
3029 (sign_extend:V2DI
3030 (vec_select:V2SI
3031 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
3032 (parallel [(const_int 0) (const_int 2)])))))]
3033 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3034 "pmuldq\t{%2, %0|%0, %2}"
3035 [(set_attr "type" "sseimul")
3036 (set_attr "prefix_extra" "1")
3037 (set_attr "mode" "TI")])
3038
3039 (define_insn "sse2_pmaddwd"
3040 [(set (match_operand:V4SI 0 "register_operand" "=x")
3041 (plus:V4SI
3042 (mult:V4SI
3043 (sign_extend:V4SI
3044 (vec_select:V4HI
3045 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3046 (parallel [(const_int 0)
3047 (const_int 2)
3048 (const_int 4)
3049 (const_int 6)])))
3050 (sign_extend:V4SI
3051 (vec_select:V4HI
3052 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3053 (parallel [(const_int 0)
3054 (const_int 2)
3055 (const_int 4)
3056 (const_int 6)]))))
3057 (mult:V4SI
3058 (sign_extend:V4SI
3059 (vec_select:V4HI (match_dup 1)
3060 (parallel [(const_int 1)
3061 (const_int 3)
3062 (const_int 5)
3063 (const_int 7)])))
3064 (sign_extend:V4SI
3065 (vec_select:V4HI (match_dup 2)
3066 (parallel [(const_int 1)
3067 (const_int 3)
3068 (const_int 5)
3069 (const_int 7)]))))))]
3070 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
3071 "pmaddwd\t{%2, %0|%0, %2}"
3072 [(set_attr "type" "sseiadd")
3073 (set_attr "prefix_data16" "1")
3074 (set_attr "mode" "TI")])
3075
3076 (define_expand "mulv4si3"
3077 [(set (match_operand:V4SI 0 "register_operand" "")
3078 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
3079 (match_operand:V4SI 2 "register_operand" "")))]
3080 "TARGET_SSE2"
3081 {
3082 if (TARGET_SSE4_1)
3083 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
3084 else
3085 {
3086 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3087 rtx op0, op1, op2;
3088
3089 op0 = operands[0];
3090 op1 = operands[1];
3091 op2 = operands[2];
3092 t1 = gen_reg_rtx (V4SImode);
3093 t2 = gen_reg_rtx (V4SImode);
3094 t3 = gen_reg_rtx (V4SImode);
3095 t4 = gen_reg_rtx (V4SImode);
3096 t5 = gen_reg_rtx (V4SImode);
3097 t6 = gen_reg_rtx (V4SImode);
3098 thirtytwo = GEN_INT (32);
3099
3100 /* Multiply elements 2 and 0. */
3101 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
3102 op1, op2));
3103
3104 /* Shift both input vectors down one element, so that elements 3
3105 and 1 are now in the slots for elements 2 and 0. For K8, at
3106 least, this is faster than using a shuffle. */
3107 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3108 gen_lowpart (TImode, op1),
3109 thirtytwo));
3110 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3111 gen_lowpart (TImode, op2),
3112 thirtytwo));
3113 /* Multiply elements 3 and 1. */
3114 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
3115 t2, t3));
3116
3117 /* Move the results in element 2 down to element 1; we don't care
3118 what goes in elements 2 and 3. */
3119 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
3120 const0_rtx, const0_rtx));
3121 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
3122 const0_rtx, const0_rtx));
3123
3124 /* Merge the parts back together. */
3125 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
3126 DONE;
3127 }
3128 })
3129
3130 (define_insn "*sse4_1_mulv4si3"
3131 [(set (match_operand:V4SI 0 "register_operand" "=x")
3132 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
3133 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
3134 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
3135 "pmulld\t{%2, %0|%0, %2}"
3136 [(set_attr "type" "sseimul")
3137 (set_attr "prefix_extra" "1")
3138 (set_attr "mode" "TI")])
3139
3140 (define_expand "mulv2di3"
3141 [(set (match_operand:V2DI 0 "register_operand" "")
3142 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
3143 (match_operand:V2DI 2 "register_operand" "")))]
3144 "TARGET_SSE2"
3145 {
3146 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
3147 rtx op0, op1, op2;
3148
3149 op0 = operands[0];
3150 op1 = operands[1];
3151 op2 = operands[2];
3152 t1 = gen_reg_rtx (V2DImode);
3153 t2 = gen_reg_rtx (V2DImode);
3154 t3 = gen_reg_rtx (V2DImode);
3155 t4 = gen_reg_rtx (V2DImode);
3156 t5 = gen_reg_rtx (V2DImode);
3157 t6 = gen_reg_rtx (V2DImode);
3158 thirtytwo = GEN_INT (32);
3159
3160 /* Multiply low parts. */
3161 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
3162 gen_lowpart (V4SImode, op2)));
3163
3164 /* Shift input vectors left 32 bits so we can multiply high parts. */
3165 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
3166 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
3167
3168 /* Multiply high parts by low parts. */
3169 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
3170 gen_lowpart (V4SImode, t3)));
3171 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
3172 gen_lowpart (V4SImode, t2)));
3173
3174 /* Shift them back. */
3175 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
3176 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
3177
3178 /* Add the three parts together. */
3179 emit_insn (gen_addv2di3 (t6, t1, t4));
3180 emit_insn (gen_addv2di3 (op0, t6, t5));
3181 DONE;
3182 })
3183
3184 (define_expand "vec_widen_smult_hi_v8hi"
3185 [(match_operand:V4SI 0 "register_operand" "")
3186 (match_operand:V8HI 1 "register_operand" "")
3187 (match_operand:V8HI 2 "register_operand" "")]
3188 "TARGET_SSE2"
3189 {
3190 rtx op1, op2, t1, t2, dest;
3191
3192 op1 = operands[1];
3193 op2 = operands[2];
3194 t1 = gen_reg_rtx (V8HImode);
3195 t2 = gen_reg_rtx (V8HImode);
3196 dest = gen_lowpart (V8HImode, operands[0]);
3197
3198 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3199 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3200 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3201 DONE;
3202 })
3203
3204 (define_expand "vec_widen_smult_lo_v8hi"
3205 [(match_operand:V4SI 0 "register_operand" "")
3206 (match_operand:V8HI 1 "register_operand" "")
3207 (match_operand:V8HI 2 "register_operand" "")]
3208 "TARGET_SSE2"
3209 {
3210 rtx op1, op2, t1, t2, dest;
3211
3212 op1 = operands[1];
3213 op2 = operands[2];
3214 t1 = gen_reg_rtx (V8HImode);
3215 t2 = gen_reg_rtx (V8HImode);
3216 dest = gen_lowpart (V8HImode, operands[0]);
3217
3218 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3219 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
3220 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3221 DONE;
3222 })
3223
3224 (define_expand "vec_widen_umult_hi_v8hi"
3225 [(match_operand:V4SI 0 "register_operand" "")
3226 (match_operand:V8HI 1 "register_operand" "")
3227 (match_operand:V8HI 2 "register_operand" "")]
3228 "TARGET_SSE2"
3229 {
3230 rtx op1, op2, t1, t2, dest;
3231
3232 op1 = operands[1];
3233 op2 = operands[2];
3234 t1 = gen_reg_rtx (V8HImode);
3235 t2 = gen_reg_rtx (V8HImode);
3236 dest = gen_lowpart (V8HImode, operands[0]);
3237
3238 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3239 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3240 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
3241 DONE;
3242 })
3243
3244 (define_expand "vec_widen_umult_lo_v8hi"
3245 [(match_operand:V4SI 0 "register_operand" "")
3246 (match_operand:V8HI 1 "register_operand" "")
3247 (match_operand:V8HI 2 "register_operand" "")]
3248 "TARGET_SSE2"
3249 {
3250 rtx op1, op2, t1, t2, dest;
3251
3252 op1 = operands[1];
3253 op2 = operands[2];
3254 t1 = gen_reg_rtx (V8HImode);
3255 t2 = gen_reg_rtx (V8HImode);
3256 dest = gen_lowpart (V8HImode, operands[0]);
3257
3258 emit_insn (gen_mulv8hi3 (t1, op1, op2));
3259 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
3260 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
3261 DONE;
3262 })
3263
3264 (define_expand "vec_widen_smult_hi_v4si"
3265 [(match_operand:V2DI 0 "register_operand" "")
3266 (match_operand:V4SI 1 "register_operand" "")
3267 (match_operand:V4SI 2 "register_operand" "")]
3268 "TARGET_SSE2"
3269 {
3270 rtx op1, op2, t1, t2;
3271
3272 op1 = operands[1];
3273 op2 = operands[2];
3274 t1 = gen_reg_rtx (V4SImode);
3275 t2 = gen_reg_rtx (V4SImode);
3276
3277 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3278 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3279 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3280 DONE;
3281 })
3282
3283 (define_expand "vec_widen_smult_lo_v4si"
3284 [(match_operand:V2DI 0 "register_operand" "")
3285 (match_operand:V4SI 1 "register_operand" "")
3286 (match_operand:V4SI 2 "register_operand" "")]
3287 "TARGET_SSE2"
3288 {
3289 rtx op1, op2, t1, t2;
3290
3291 op1 = operands[1];
3292 op2 = operands[2];
3293 t1 = gen_reg_rtx (V4SImode);
3294 t2 = gen_reg_rtx (V4SImode);
3295
3296 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3297 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3298 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3299 DONE;
3300 })
3301
3302 (define_expand "vec_widen_umult_hi_v4si"
3303 [(match_operand:V2DI 0 "register_operand" "")
3304 (match_operand:V4SI 1 "register_operand" "")
3305 (match_operand:V4SI 2 "register_operand" "")]
3306 "TARGET_SSE2"
3307 {
3308 rtx op1, op2, t1, t2;
3309
3310 op1 = operands[1];
3311 op2 = operands[2];
3312 t1 = gen_reg_rtx (V4SImode);
3313 t2 = gen_reg_rtx (V4SImode);
3314
3315 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
3316 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
3317 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3318 DONE;
3319 })
3320
3321 (define_expand "vec_widen_umult_lo_v4si"
3322 [(match_operand:V2DI 0 "register_operand" "")
3323 (match_operand:V4SI 1 "register_operand" "")
3324 (match_operand:V4SI 2 "register_operand" "")]
3325 "TARGET_SSE2"
3326 {
3327 rtx op1, op2, t1, t2;
3328
3329 op1 = operands[1];
3330 op2 = operands[2];
3331 t1 = gen_reg_rtx (V4SImode);
3332 t2 = gen_reg_rtx (V4SImode);
3333
3334 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
3335 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
3336 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
3337 DONE;
3338 })
3339
3340 (define_expand "sdot_prodv8hi"
3341 [(match_operand:V4SI 0 "register_operand" "")
3342 (match_operand:V8HI 1 "register_operand" "")
3343 (match_operand:V8HI 2 "register_operand" "")
3344 (match_operand:V4SI 3 "register_operand" "")]
3345 "TARGET_SSE2"
3346 {
3347 rtx t = gen_reg_rtx (V4SImode);
3348 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
3349 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
3350 DONE;
3351 })
3352
3353 (define_expand "udot_prodv4si"
3354 [(match_operand:V2DI 0 "register_operand" "")
3355 (match_operand:V4SI 1 "register_operand" "")
3356 (match_operand:V4SI 2 "register_operand" "")
3357 (match_operand:V2DI 3 "register_operand" "")]
3358 "TARGET_SSE2"
3359 {
3360 rtx t1, t2, t3, t4;
3361
3362 t1 = gen_reg_rtx (V2DImode);
3363 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
3364 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
3365
3366 t2 = gen_reg_rtx (V4SImode);
3367 t3 = gen_reg_rtx (V4SImode);
3368 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
3369 gen_lowpart (TImode, operands[1]),
3370 GEN_INT (32)));
3371 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
3372 gen_lowpart (TImode, operands[2]),
3373 GEN_INT (32)));
3374
3375 t4 = gen_reg_rtx (V2DImode);
3376 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
3377
3378 emit_insn (gen_addv2di3 (operands[0], t1, t4));
3379 DONE;
3380 })
3381
3382 (define_insn "ashr<mode>3"
3383 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3384 (ashiftrt:SSEMODE24
3385 (match_operand:SSEMODE24 1 "register_operand" "0")
3386 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3387 "TARGET_SSE2"
3388 "psra<ssevecsize>\t{%2, %0|%0, %2}"
3389 [(set_attr "type" "sseishft")
3390 (set_attr "prefix_data16" "1")
3391 (set_attr "mode" "TI")])
3392
3393 (define_insn "lshr<mode>3"
3394 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3395 (lshiftrt:SSEMODE248
3396 (match_operand:SSEMODE248 1 "register_operand" "0")
3397 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3398 "TARGET_SSE2"
3399 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
3400 [(set_attr "type" "sseishft")
3401 (set_attr "prefix_data16" "1")
3402 (set_attr "mode" "TI")])
3403
3404 (define_insn "ashl<mode>3"
3405 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
3406 (ashift:SSEMODE248
3407 (match_operand:SSEMODE248 1 "register_operand" "0")
3408 (match_operand:TI 2 "nonmemory_operand" "xn")))]
3409 "TARGET_SSE2"
3410 "psll<ssevecsize>\t{%2, %0|%0, %2}"
3411 [(set_attr "type" "sseishft")
3412 (set_attr "prefix_data16" "1")
3413 (set_attr "mode" "TI")])
3414
3415 (define_expand "vec_shl_<mode>"
3416 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3417 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
3418 (match_operand:SI 2 "general_operand" "")))]
3419 "TARGET_SSE2"
3420 {
3421 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3422 FAIL;
3423 operands[0] = gen_lowpart (TImode, operands[0]);
3424 operands[1] = gen_lowpart (TImode, operands[1]);
3425 })
3426
3427 (define_expand "vec_shr_<mode>"
3428 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3429 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
3430 (match_operand:SI 2 "general_operand" "")))]
3431 "TARGET_SSE2"
3432 {
3433 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
3434 FAIL;
3435 operands[0] = gen_lowpart (TImode, operands[0]);
3436 operands[1] = gen_lowpart (TImode, operands[1]);
3437 })
3438
3439 (define_expand "umaxv16qi3"
3440 [(set (match_operand:V16QI 0 "register_operand" "")
3441 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3442 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3443 "TARGET_SSE2"
3444 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
3445
3446 (define_insn "*umaxv16qi3"
3447 [(set (match_operand:V16QI 0 "register_operand" "=x")
3448 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3449 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3450 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
3451 "pmaxub\t{%2, %0|%0, %2}"
3452 [(set_attr "type" "sseiadd")
3453 (set_attr "prefix_data16" "1")
3454 (set_attr "mode" "TI")])
3455
3456 (define_expand "smaxv8hi3"
3457 [(set (match_operand:V8HI 0 "register_operand" "")
3458 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3459 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3460 "TARGET_SSE2"
3461 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
3462
3463 (define_insn "*smaxv8hi3"
3464 [(set (match_operand:V8HI 0 "register_operand" "=x")
3465 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3466 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3467 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
3468 "pmaxsw\t{%2, %0|%0, %2}"
3469 [(set_attr "type" "sseiadd")
3470 (set_attr "prefix_data16" "1")
3471 (set_attr "mode" "TI")])
3472
3473 (define_expand "umaxv8hi3"
3474 [(set (match_operand:V8HI 0 "register_operand" "")
3475 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
3476 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3477 "TARGET_SSE2"
3478 {
3479 if (TARGET_SSE4_1)
3480 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
3481 else
3482 {
3483 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
3484 if (rtx_equal_p (op3, op2))
3485 op3 = gen_reg_rtx (V8HImode);
3486 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
3487 emit_insn (gen_addv8hi3 (op0, op3, op2));
3488 DONE;
3489 }
3490 })
3491
3492 (define_expand "smax<mode>3"
3493 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3494 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3495 (match_operand:SSEMODE14 2 "register_operand" "")))]
3496 "TARGET_SSE2"
3497 {
3498 if (TARGET_SSE4_1)
3499 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
3500 else
3501 {
3502 rtx xops[6];
3503 bool ok;
3504
3505 xops[0] = operands[0];
3506 xops[1] = operands[1];
3507 xops[2] = operands[2];
3508 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3509 xops[4] = operands[1];
3510 xops[5] = operands[2];
3511 ok = ix86_expand_int_vcond (xops);
3512 gcc_assert (ok);
3513 DONE;
3514 }
3515 })
3516
3517 (define_insn "*sse4_1_smax<mode>3"
3518 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3519 (smax:SSEMODE14
3520 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3521 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3522 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
3523 "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
3524 [(set_attr "type" "sseiadd")
3525 (set_attr "prefix_extra" "1")
3526 (set_attr "mode" "TI")])
3527
3528 (define_expand "umaxv4si3"
3529 [(set (match_operand:V4SI 0 "register_operand" "")
3530 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
3531 (match_operand:V4SI 2 "register_operand" "")))]
3532 "TARGET_SSE2"
3533 {
3534 if (TARGET_SSE4_1)
3535 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
3536 else
3537 {
3538 rtx xops[6];
3539 bool ok;
3540
3541 xops[0] = operands[0];
3542 xops[1] = operands[1];
3543 xops[2] = operands[2];
3544 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3545 xops[4] = operands[1];
3546 xops[5] = operands[2];
3547 ok = ix86_expand_int_vcond (xops);
3548 gcc_assert (ok);
3549 DONE;
3550 }
3551 })
3552
3553 (define_insn "*sse4_1_umax<mode>3"
3554 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3555 (umax:SSEMODE24
3556 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3557 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3558 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
3559 "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
3560 [(set_attr "type" "sseiadd")
3561 (set_attr "prefix_extra" "1")
3562 (set_attr "mode" "TI")])
3563
3564 (define_expand "uminv16qi3"
3565 [(set (match_operand:V16QI 0 "register_operand" "")
3566 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
3567 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
3568 "TARGET_SSE2"
3569 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
3570
3571 (define_insn "*uminv16qi3"
3572 [(set (match_operand:V16QI 0 "register_operand" "=x")
3573 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
3574 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
3575 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
3576 "pminub\t{%2, %0|%0, %2}"
3577 [(set_attr "type" "sseiadd")
3578 (set_attr "prefix_data16" "1")
3579 (set_attr "mode" "TI")])
3580
3581 (define_expand "sminv8hi3"
3582 [(set (match_operand:V8HI 0 "register_operand" "")
3583 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
3584 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
3585 "TARGET_SSE2"
3586 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
3587
3588 (define_insn "*sminv8hi3"
3589 [(set (match_operand:V8HI 0 "register_operand" "=x")
3590 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
3591 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
3592 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
3593 "pminsw\t{%2, %0|%0, %2}"
3594 [(set_attr "type" "sseiadd")
3595 (set_attr "prefix_data16" "1")
3596 (set_attr "mode" "TI")])
3597
3598 (define_expand "smin<mode>3"
3599 [(set (match_operand:SSEMODE14 0 "register_operand" "")
3600 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
3601 (match_operand:SSEMODE14 2 "register_operand" "")))]
3602 "TARGET_SSE2"
3603 {
3604 if (TARGET_SSE4_1)
3605 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
3606 else
3607 {
3608 rtx xops[6];
3609 bool ok;
3610
3611 xops[0] = operands[0];
3612 xops[1] = operands[2];
3613 xops[2] = operands[1];
3614 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
3615 xops[4] = operands[1];
3616 xops[5] = operands[2];
3617 ok = ix86_expand_int_vcond (xops);
3618 gcc_assert (ok);
3619 DONE;
3620 }
3621 })
3622
3623 (define_insn "*sse4_1_smin<mode>3"
3624 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
3625 (smin:SSEMODE14
3626 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
3627 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
3628 "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
3629 "pmins<ssevecsize>\t{%2, %0|%0, %2}"
3630 [(set_attr "type" "sseiadd")
3631 (set_attr "prefix_extra" "1")
3632 (set_attr "mode" "TI")])
3633
3634 (define_expand "umin<mode>3"
3635 [(set (match_operand:SSEMODE24 0 "register_operand" "")
3636 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
3637 (match_operand:SSEMODE24 2 "register_operand" "")))]
3638 "TARGET_SSE2"
3639 {
3640 if (TARGET_SSE4_1)
3641 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
3642 else
3643 {
3644 rtx xops[6];
3645 bool ok;
3646
3647 xops[0] = operands[0];
3648 xops[1] = operands[2];
3649 xops[2] = operands[1];
3650 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
3651 xops[4] = operands[1];
3652 xops[5] = operands[2];
3653 ok = ix86_expand_int_vcond (xops);
3654 gcc_assert (ok);
3655 DONE;
3656 }
3657 })
3658
3659 (define_insn "*sse4_1_umin<mode>3"
3660 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
3661 (umin:SSEMODE24
3662 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
3663 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
3664 "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
3665 "pminu<ssevecsize>\t{%2, %0|%0, %2}"
3666 [(set_attr "type" "sseiadd")
3667 (set_attr "prefix_extra" "1")
3668 (set_attr "mode" "TI")])
3669
3670 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3671 ;;
3672 ;; Parallel integral comparisons
3673 ;;
3674 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3675
3676 (define_insn "sse2_eq<mode>3"
3677 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3678 (eq:SSEMODE124
3679 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
3680 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3681 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
3682 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
3683 [(set_attr "type" "ssecmp")
3684 (set_attr "prefix_data16" "1")
3685 (set_attr "mode" "TI")])
3686
3687 (define_insn "sse4_1_eqv2di3"
3688 [(set (match_operand:V2DI 0 "register_operand" "=x")
3689 (eq:V2DI
3690 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
3691 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3692 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
3693 "pcmpeqq\t{%2, %0|%0, %2}"
3694 [(set_attr "type" "ssecmp")
3695 (set_attr "prefix_extra" "1")
3696 (set_attr "mode" "TI")])
3697
3698 (define_insn "sse2_gt<mode>3"
3699 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
3700 (gt:SSEMODE124
3701 (match_operand:SSEMODE124 1 "register_operand" "0")
3702 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
3703 "TARGET_SSE2"
3704 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3705 [(set_attr "type" "ssecmp")
3706 (set_attr "prefix_data16" "1")
3707 (set_attr "mode" "TI")])
3708
3709 (define_insn "sse4_2_gtv2di3"
3710 [(set (match_operand:V2DI 0 "register_operand" "=x")
3711 (gt:V2DI
3712 (match_operand:V2DI 1 "nonimmediate_operand" "0")
3713 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
3714 "TARGET_SSE4_2"
3715 "pcmpgtq\t{%2, %0|%0, %2}"
3716 [(set_attr "type" "ssecmp")
3717 (set_attr "mode" "TI")])
3718
3719 (define_expand "vcond<mode>"
3720 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3721 (if_then_else:SSEMODEI
3722 (match_operator 3 ""
3723 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3724 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3725 (match_operand:SSEMODEI 1 "general_operand" "")
3726 (match_operand:SSEMODEI 2 "general_operand" "")))]
3727 "TARGET_SSE2"
3728 {
3729 if (ix86_expand_int_vcond (operands))
3730 DONE;
3731 else
3732 FAIL;
3733 })
3734
3735 (define_expand "vcondu<mode>"
3736 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3737 (if_then_else:SSEMODEI
3738 (match_operator 3 ""
3739 [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
3740 (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
3741 (match_operand:SSEMODEI 1 "general_operand" "")
3742 (match_operand:SSEMODEI 2 "general_operand" "")))]
3743 "TARGET_SSE2"
3744 {
3745 if (ix86_expand_int_vcond (operands))
3746 DONE;
3747 else
3748 FAIL;
3749 })
3750
3751 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3752 ;;
3753 ;; Parallel bitwise logical operations
3754 ;;
3755 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3756
3757 (define_expand "one_cmpl<mode>2"
3758 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3759 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3760 (match_dup 2)))]
3761 "TARGET_SSE2"
3762 {
3763 int i, n = GET_MODE_NUNITS (<MODE>mode);
3764 rtvec v = rtvec_alloc (n);
3765
3766 for (i = 0; i < n; ++i)
3767 RTVEC_ELT (v, i) = constm1_rtx;
3768
3769 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3770 })
3771
3772 (define_expand "and<mode>3"
3773 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3774 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3775 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3776 "TARGET_SSE2"
3777 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3778
3779 (define_insn "*and<mode>3"
3780 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3781 (and:SSEMODEI
3782 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3783 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3784 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3785 "pand\t{%2, %0|%0, %2}"
3786 [(set_attr "type" "sselog")
3787 (set_attr "prefix_data16" "1")
3788 (set_attr "mode" "TI")])
3789
3790 (define_insn "sse2_nand<mode>3"
3791 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3792 (and:SSEMODEI
3793 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3794 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3795 "TARGET_SSE2"
3796 "pandn\t{%2, %0|%0, %2}"
3797 [(set_attr "type" "sselog")
3798 (set_attr "prefix_data16" "1")
3799 (set_attr "mode" "TI")])
3800
3801 (define_expand "andtf3"
3802 [(set (match_operand:TF 0 "register_operand" "")
3803 (and:TF (match_operand:TF 1 "nonimmediate_operand" "")
3804 (match_operand:TF 2 "nonimmediate_operand" "")))]
3805 "TARGET_64BIT"
3806 "ix86_fixup_binary_operands_no_copy (AND, TFmode, operands);")
3807
3808 (define_insn "*andtf3"
3809 [(set (match_operand:TF 0 "register_operand" "=x")
3810 (and:TF
3811 (match_operand:TF 1 "nonimmediate_operand" "%0")
3812 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3813 "TARGET_64BIT && ix86_binary_operator_ok (AND, TFmode, operands)"
3814 "pand\t{%2, %0|%0, %2}"
3815 [(set_attr "type" "sselog")
3816 (set_attr "prefix_data16" "1")
3817 (set_attr "mode" "TI")])
3818
3819 (define_insn "*nandtf3"
3820 [(set (match_operand:TF 0 "register_operand" "=x")
3821 (and:TF
3822 (not:TF (match_operand:TF 1 "register_operand" "0"))
3823 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3824 "TARGET_64BIT"
3825 "pandn\t{%2, %0|%0, %2}"
3826 [(set_attr "type" "sselog")
3827 (set_attr "prefix_data16" "1")
3828 (set_attr "mode" "TI")])
3829
3830 (define_expand "ior<mode>3"
3831 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3832 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3833 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3834 "TARGET_SSE2"
3835 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3836
3837 (define_insn "*ior<mode>3"
3838 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3839 (ior:SSEMODEI
3840 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3841 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3842 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3843 "por\t{%2, %0|%0, %2}"
3844 [(set_attr "type" "sselog")
3845 (set_attr "prefix_data16" "1")
3846 (set_attr "mode" "TI")])
3847
3848 (define_expand "iortf3"
3849 [(set (match_operand:TF 0 "register_operand" "")
3850 (ior:TF (match_operand:TF 1 "nonimmediate_operand" "")
3851 (match_operand:TF 2 "nonimmediate_operand" "")))]
3852 "TARGET_64BIT"
3853 "ix86_fixup_binary_operands_no_copy (IOR, TFmode, operands);")
3854
3855 (define_insn "*iortf3"
3856 [(set (match_operand:TF 0 "register_operand" "=x")
3857 (ior:TF
3858 (match_operand:TF 1 "nonimmediate_operand" "%0")
3859 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3860 "TARGET_64BIT && ix86_binary_operator_ok (IOR, TFmode, operands)"
3861 "por\t{%2, %0|%0, %2}"
3862 [(set_attr "type" "sselog")
3863 (set_attr "prefix_data16" "1")
3864 (set_attr "mode" "TI")])
3865
3866 (define_expand "xor<mode>3"
3867 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3868 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3869 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3870 "TARGET_SSE2"
3871 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3872
3873 (define_insn "*xor<mode>3"
3874 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3875 (xor:SSEMODEI
3876 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3877 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3878 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3879 "pxor\t{%2, %0|%0, %2}"
3880 [(set_attr "type" "sselog")
3881 (set_attr "prefix_data16" "1")
3882 (set_attr "mode" "TI")])
3883
3884 (define_expand "xortf3"
3885 [(set (match_operand:TF 0 "register_operand" "")
3886 (xor:TF (match_operand:TF 1 "nonimmediate_operand" "")
3887 (match_operand:TF 2 "nonimmediate_operand" "")))]
3888 "TARGET_64BIT"
3889 "ix86_fixup_binary_operands_no_copy (XOR, TFmode, operands);")
3890
3891 (define_insn "*xortf3"
3892 [(set (match_operand:TF 0 "register_operand" "=x")
3893 (xor:TF
3894 (match_operand:TF 1 "nonimmediate_operand" "%0")
3895 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
3896 "TARGET_64BIT && ix86_binary_operator_ok (XOR, TFmode, operands)"
3897 "pxor\t{%2, %0|%0, %2}"
3898 [(set_attr "type" "sselog")
3899 (set_attr "prefix_data16" "1")
3900 (set_attr "mode" "TI")])
3901
3902 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3903 ;;
3904 ;; Parallel integral element swizzling
3905 ;;
3906 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3907
3908 ;; Reduce:
3909 ;; op1 = abcdefghijklmnop
3910 ;; op2 = qrstuvwxyz012345
3911 ;; h1 = aqbrcsdteufvgwhx
3912 ;; l1 = iyjzk0l1m2n3o4p5
3913 ;; h2 = aiqybjrzcks0dlt1
3914 ;; l2 = emu2fnv3gow4hpx5
3915 ;; h3 = aeimquy2bfjnrvz3
3916 ;; l3 = cgkosw04dhlptx15
3917 ;; result = bdfhjlnprtvxz135
3918 (define_expand "vec_pack_trunc_v8hi"
3919 [(match_operand:V16QI 0 "register_operand" "")
3920 (match_operand:V8HI 1 "register_operand" "")
3921 (match_operand:V8HI 2 "register_operand" "")]
3922 "TARGET_SSE2"
3923 {
3924 rtx op1, op2, h1, l1, h2, l2, h3, l3;
3925
3926 op1 = gen_lowpart (V16QImode, operands[1]);
3927 op2 = gen_lowpart (V16QImode, operands[2]);
3928 h1 = gen_reg_rtx (V16QImode);
3929 l1 = gen_reg_rtx (V16QImode);
3930 h2 = gen_reg_rtx (V16QImode);
3931 l2 = gen_reg_rtx (V16QImode);
3932 h3 = gen_reg_rtx (V16QImode);
3933 l3 = gen_reg_rtx (V16QImode);
3934
3935 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
3936 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
3937 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
3938 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
3939 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
3940 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
3941 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
3942 DONE;
3943 })
3944
3945 ;; Reduce:
3946 ;; op1 = abcdefgh
3947 ;; op2 = ijklmnop
3948 ;; h1 = aibjckdl
3949 ;; l1 = emfngohp
3950 ;; h2 = aeimbfjn
3951 ;; l2 = cgkodhlp
3952 ;; result = bdfhjlnp
3953 (define_expand "vec_pack_trunc_v4si"
3954 [(match_operand:V8HI 0 "register_operand" "")
3955 (match_operand:V4SI 1 "register_operand" "")
3956 (match_operand:V4SI 2 "register_operand" "")]
3957 "TARGET_SSE2"
3958 {
3959 rtx op1, op2, h1, l1, h2, l2;
3960
3961 op1 = gen_lowpart (V8HImode, operands[1]);
3962 op2 = gen_lowpart (V8HImode, operands[2]);
3963 h1 = gen_reg_rtx (V8HImode);
3964 l1 = gen_reg_rtx (V8HImode);
3965 h2 = gen_reg_rtx (V8HImode);
3966 l2 = gen_reg_rtx (V8HImode);
3967
3968 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
3969 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
3970 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
3971 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
3972 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
3973 DONE;
3974 })
3975
3976 ;; Reduce:
3977 ;; op1 = abcd
3978 ;; op2 = efgh
3979 ;; h1 = aebf
3980 ;; l1 = cgdh
3981 ;; result = bdfh
3982 (define_expand "vec_pack_trunc_v2di"
3983 [(match_operand:V4SI 0 "register_operand" "")
3984 (match_operand:V2DI 1 "register_operand" "")
3985 (match_operand:V2DI 2 "register_operand" "")]
3986 "TARGET_SSE2"
3987 {
3988 rtx op1, op2, h1, l1;
3989
3990 op1 = gen_lowpart (V4SImode, operands[1]);
3991 op2 = gen_lowpart (V4SImode, operands[2]);
3992 h1 = gen_reg_rtx (V4SImode);
3993 l1 = gen_reg_rtx (V4SImode);
3994
3995 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
3996 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
3997 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
3998 DONE;
3999 })
4000
4001 (define_expand "vec_interleave_highv16qi"
4002 [(set (match_operand:V16QI 0 "register_operand" "=x")
4003 (vec_select:V16QI
4004 (vec_concat:V32QI
4005 (match_operand:V16QI 1 "register_operand" "0")
4006 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4007 (parallel [(const_int 8) (const_int 24)
4008 (const_int 9) (const_int 25)
4009 (const_int 10) (const_int 26)
4010 (const_int 11) (const_int 27)
4011 (const_int 12) (const_int 28)
4012 (const_int 13) (const_int 29)
4013 (const_int 14) (const_int 30)
4014 (const_int 15) (const_int 31)])))]
4015 "TARGET_SSE2"
4016 {
4017 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
4018 DONE;
4019 })
4020
4021 (define_expand "vec_interleave_lowv16qi"
4022 [(set (match_operand:V16QI 0 "register_operand" "=x")
4023 (vec_select:V16QI
4024 (vec_concat:V32QI
4025 (match_operand:V16QI 1 "register_operand" "0")
4026 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4027 (parallel [(const_int 0) (const_int 16)
4028 (const_int 1) (const_int 17)
4029 (const_int 2) (const_int 18)
4030 (const_int 3) (const_int 19)
4031 (const_int 4) (const_int 20)
4032 (const_int 5) (const_int 21)
4033 (const_int 6) (const_int 22)
4034 (const_int 7) (const_int 23)])))]
4035 "TARGET_SSE2"
4036 {
4037 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
4038 DONE;
4039 })
4040
4041 (define_expand "vec_interleave_highv8hi"
4042 [(set (match_operand:V8HI 0 "register_operand" "=x")
4043 (vec_select:V8HI
4044 (vec_concat:V16HI
4045 (match_operand:V8HI 1 "register_operand" "0")
4046 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4047 (parallel [(const_int 4) (const_int 12)
4048 (const_int 5) (const_int 13)
4049 (const_int 6) (const_int 14)
4050 (const_int 7) (const_int 15)])))]
4051 "TARGET_SSE2"
4052 {
4053 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
4054 DONE;
4055 })
4056
4057 (define_expand "vec_interleave_lowv8hi"
4058 [(set (match_operand:V8HI 0 "register_operand" "=x")
4059 (vec_select:V8HI
4060 (vec_concat:V16HI
4061 (match_operand:V8HI 1 "register_operand" "0")
4062 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4063 (parallel [(const_int 0) (const_int 8)
4064 (const_int 1) (const_int 9)
4065 (const_int 2) (const_int 10)
4066 (const_int 3) (const_int 11)])))]
4067 "TARGET_SSE2"
4068 {
4069 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
4070 DONE;
4071 })
4072
4073 (define_expand "vec_interleave_highv4si"
4074 [(set (match_operand:V4SI 0 "register_operand" "=x")
4075 (vec_select:V4SI
4076 (vec_concat:V8SI
4077 (match_operand:V4SI 1 "register_operand" "0")
4078 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4079 (parallel [(const_int 2) (const_int 6)
4080 (const_int 3) (const_int 7)])))]
4081 "TARGET_SSE2"
4082 {
4083 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
4084 DONE;
4085 })
4086
4087 (define_expand "vec_interleave_lowv4si"
4088 [(set (match_operand:V4SI 0 "register_operand" "=x")
4089 (vec_select:V4SI
4090 (vec_concat:V8SI
4091 (match_operand:V4SI 1 "register_operand" "0")
4092 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4093 (parallel [(const_int 0) (const_int 4)
4094 (const_int 1) (const_int 5)])))]
4095 "TARGET_SSE2"
4096 {
4097 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
4098 DONE;
4099 })
4100
4101 (define_expand "vec_interleave_highv2di"
4102 [(set (match_operand:V2DI 0 "register_operand" "=x")
4103 (vec_select:V2DI
4104 (vec_concat:V4DI
4105 (match_operand:V2DI 1 "register_operand" "0")
4106 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4107 (parallel [(const_int 1)
4108 (const_int 3)])))]
4109 "TARGET_SSE2"
4110 {
4111 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
4112 DONE;
4113 })
4114
4115 (define_expand "vec_interleave_lowv2di"
4116 [(set (match_operand:V2DI 0 "register_operand" "=x")
4117 (vec_select:V2DI
4118 (vec_concat:V4DI
4119 (match_operand:V2DI 1 "register_operand" "0")
4120 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4121 (parallel [(const_int 0)
4122 (const_int 2)])))]
4123 "TARGET_SSE2"
4124 {
4125 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
4126 DONE;
4127 })
4128
4129 (define_insn "sse2_packsswb"
4130 [(set (match_operand:V16QI 0 "register_operand" "=x")
4131 (vec_concat:V16QI
4132 (ss_truncate:V8QI
4133 (match_operand:V8HI 1 "register_operand" "0"))
4134 (ss_truncate:V8QI
4135 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4136 "TARGET_SSE2"
4137 "packsswb\t{%2, %0|%0, %2}"
4138 [(set_attr "type" "sselog")
4139 (set_attr "prefix_data16" "1")
4140 (set_attr "mode" "TI")])
4141
4142 (define_insn "sse2_packssdw"
4143 [(set (match_operand:V8HI 0 "register_operand" "=x")
4144 (vec_concat:V8HI
4145 (ss_truncate:V4HI
4146 (match_operand:V4SI 1 "register_operand" "0"))
4147 (ss_truncate:V4HI
4148 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
4149 "TARGET_SSE2"
4150 "packssdw\t{%2, %0|%0, %2}"
4151 [(set_attr "type" "sselog")
4152 (set_attr "prefix_data16" "1")
4153 (set_attr "mode" "TI")])
4154
4155 (define_insn "sse2_packuswb"
4156 [(set (match_operand:V16QI 0 "register_operand" "=x")
4157 (vec_concat:V16QI
4158 (us_truncate:V8QI
4159 (match_operand:V8HI 1 "register_operand" "0"))
4160 (us_truncate:V8QI
4161 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
4162 "TARGET_SSE2"
4163 "packuswb\t{%2, %0|%0, %2}"
4164 [(set_attr "type" "sselog")
4165 (set_attr "prefix_data16" "1")
4166 (set_attr "mode" "TI")])
4167
4168 (define_insn "sse2_punpckhbw"
4169 [(set (match_operand:V16QI 0 "register_operand" "=x")
4170 (vec_select:V16QI
4171 (vec_concat:V32QI
4172 (match_operand:V16QI 1 "register_operand" "0")
4173 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4174 (parallel [(const_int 8) (const_int 24)
4175 (const_int 9) (const_int 25)
4176 (const_int 10) (const_int 26)
4177 (const_int 11) (const_int 27)
4178 (const_int 12) (const_int 28)
4179 (const_int 13) (const_int 29)
4180 (const_int 14) (const_int 30)
4181 (const_int 15) (const_int 31)])))]
4182 "TARGET_SSE2"
4183 "punpckhbw\t{%2, %0|%0, %2}"
4184 [(set_attr "type" "sselog")
4185 (set_attr "prefix_data16" "1")
4186 (set_attr "mode" "TI")])
4187
4188 (define_insn "sse2_punpcklbw"
4189 [(set (match_operand:V16QI 0 "register_operand" "=x")
4190 (vec_select:V16QI
4191 (vec_concat:V32QI
4192 (match_operand:V16QI 1 "register_operand" "0")
4193 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
4194 (parallel [(const_int 0) (const_int 16)
4195 (const_int 1) (const_int 17)
4196 (const_int 2) (const_int 18)
4197 (const_int 3) (const_int 19)
4198 (const_int 4) (const_int 20)
4199 (const_int 5) (const_int 21)
4200 (const_int 6) (const_int 22)
4201 (const_int 7) (const_int 23)])))]
4202 "TARGET_SSE2"
4203 "punpcklbw\t{%2, %0|%0, %2}"
4204 [(set_attr "type" "sselog")
4205 (set_attr "prefix_data16" "1")
4206 (set_attr "mode" "TI")])
4207
4208 (define_insn "sse2_punpckhwd"
4209 [(set (match_operand:V8HI 0 "register_operand" "=x")
4210 (vec_select:V8HI
4211 (vec_concat:V16HI
4212 (match_operand:V8HI 1 "register_operand" "0")
4213 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4214 (parallel [(const_int 4) (const_int 12)
4215 (const_int 5) (const_int 13)
4216 (const_int 6) (const_int 14)
4217 (const_int 7) (const_int 15)])))]
4218 "TARGET_SSE2"
4219 "punpckhwd\t{%2, %0|%0, %2}"
4220 [(set_attr "type" "sselog")
4221 (set_attr "prefix_data16" "1")
4222 (set_attr "mode" "TI")])
4223
4224 (define_insn "sse2_punpcklwd"
4225 [(set (match_operand:V8HI 0 "register_operand" "=x")
4226 (vec_select:V8HI
4227 (vec_concat:V16HI
4228 (match_operand:V8HI 1 "register_operand" "0")
4229 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
4230 (parallel [(const_int 0) (const_int 8)
4231 (const_int 1) (const_int 9)
4232 (const_int 2) (const_int 10)
4233 (const_int 3) (const_int 11)])))]
4234 "TARGET_SSE2"
4235 "punpcklwd\t{%2, %0|%0, %2}"
4236 [(set_attr "type" "sselog")
4237 (set_attr "prefix_data16" "1")
4238 (set_attr "mode" "TI")])
4239
4240 (define_insn "sse2_punpckhdq"
4241 [(set (match_operand:V4SI 0 "register_operand" "=x")
4242 (vec_select:V4SI
4243 (vec_concat:V8SI
4244 (match_operand:V4SI 1 "register_operand" "0")
4245 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4246 (parallel [(const_int 2) (const_int 6)
4247 (const_int 3) (const_int 7)])))]
4248 "TARGET_SSE2"
4249 "punpckhdq\t{%2, %0|%0, %2}"
4250 [(set_attr "type" "sselog")
4251 (set_attr "prefix_data16" "1")
4252 (set_attr "mode" "TI")])
4253
4254 (define_insn "sse2_punpckldq"
4255 [(set (match_operand:V4SI 0 "register_operand" "=x")
4256 (vec_select:V4SI
4257 (vec_concat:V8SI
4258 (match_operand:V4SI 1 "register_operand" "0")
4259 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
4260 (parallel [(const_int 0) (const_int 4)
4261 (const_int 1) (const_int 5)])))]
4262 "TARGET_SSE2"
4263 "punpckldq\t{%2, %0|%0, %2}"
4264 [(set_attr "type" "sselog")
4265 (set_attr "prefix_data16" "1")
4266 (set_attr "mode" "TI")])
4267
4268 (define_insn "sse2_punpckhqdq"
4269 [(set (match_operand:V2DI 0 "register_operand" "=x")
4270 (vec_select:V2DI
4271 (vec_concat:V4DI
4272 (match_operand:V2DI 1 "register_operand" "0")
4273 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4274 (parallel [(const_int 1)
4275 (const_int 3)])))]
4276 "TARGET_SSE2"
4277 "punpckhqdq\t{%2, %0|%0, %2}"
4278 [(set_attr "type" "sselog")
4279 (set_attr "prefix_data16" "1")
4280 (set_attr "mode" "TI")])
4281
4282 (define_insn "sse2_punpcklqdq"
4283 [(set (match_operand:V2DI 0 "register_operand" "=x")
4284 (vec_select:V2DI
4285 (vec_concat:V4DI
4286 (match_operand:V2DI 1 "register_operand" "0")
4287 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4288 (parallel [(const_int 0)
4289 (const_int 2)])))]
4290 "TARGET_SSE2"
4291 "punpcklqdq\t{%2, %0|%0, %2}"
4292 [(set_attr "type" "sselog")
4293 (set_attr "prefix_data16" "1")
4294 (set_attr "mode" "TI")])
4295
4296 (define_insn "*sse4_1_pinsrb"
4297 [(set (match_operand:V16QI 0 "register_operand" "=x")
4298 (vec_merge:V16QI
4299 (vec_duplicate:V16QI
4300 (match_operand:QI 2 "nonimmediate_operand" "rm"))
4301 (match_operand:V16QI 1 "register_operand" "0")
4302 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
4303 "TARGET_SSE4_1"
4304 {
4305 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4306 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
4307 }
4308 [(set_attr "type" "sselog")
4309 (set_attr "prefix_extra" "1")
4310 (set_attr "mode" "TI")])
4311
4312 (define_insn "*sse2_pinsrw"
4313 [(set (match_operand:V8HI 0 "register_operand" "=x")
4314 (vec_merge:V8HI
4315 (vec_duplicate:V8HI
4316 (match_operand:HI 2 "nonimmediate_operand" "rm"))
4317 (match_operand:V8HI 1 "register_operand" "0")
4318 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
4319 "TARGET_SSE2"
4320 {
4321 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4322 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
4323 }
4324 [(set_attr "type" "sselog")
4325 (set_attr "prefix_data16" "1")
4326 (set_attr "mode" "TI")])
4327
4328 ;; It must come before sse2_loadld since it is preferred.
4329 (define_insn "*sse4_1_pinsrd"
4330 [(set (match_operand:V4SI 0 "register_operand" "=x")
4331 (vec_merge:V4SI
4332 (vec_duplicate:V4SI
4333 (match_operand:SI 2 "nonimmediate_operand" "rm"))
4334 (match_operand:V4SI 1 "register_operand" "0")
4335 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4336 "TARGET_SSE4_1"
4337 {
4338 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4339 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
4340 }
4341 [(set_attr "type" "sselog")
4342 (set_attr "prefix_extra" "1")
4343 (set_attr "mode" "TI")])
4344
4345 (define_insn "*sse4_1_pinsrq"
4346 [(set (match_operand:V2DI 0 "register_operand" "=x")
4347 (vec_merge:V2DI
4348 (vec_duplicate:V2DI
4349 (match_operand:DI 2 "nonimmediate_operand" "rm"))
4350 (match_operand:V2DI 1 "register_operand" "0")
4351 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
4352 "TARGET_SSE4_1"
4353 {
4354 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
4355 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
4356 }
4357 [(set_attr "type" "sselog")
4358 (set_attr "prefix_extra" "1")
4359 (set_attr "mode" "TI")])
4360
4361 (define_insn "*sse4_1_pextrb"
4362 [(set (match_operand:SI 0 "register_operand" "=r")
4363 (zero_extend:SI
4364 (vec_select:QI
4365 (match_operand:V16QI 1 "register_operand" "x")
4366 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
4367 "TARGET_SSE4_1"
4368 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4369 [(set_attr "type" "sselog")
4370 (set_attr "prefix_extra" "1")
4371 (set_attr "mode" "TI")])
4372
4373 (define_insn "*sse4_1_pextrb_memory"
4374 [(set (match_operand:QI 0 "memory_operand" "=m")
4375 (vec_select:QI
4376 (match_operand:V16QI 1 "register_operand" "x")
4377 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
4378 "TARGET_SSE4_1"
4379 "pextrb\t{%2, %1, %0|%0, %1, %2}"
4380 [(set_attr "type" "sselog")
4381 (set_attr "prefix_extra" "1")
4382 (set_attr "mode" "TI")])
4383
4384 (define_insn "*sse2_pextrw"
4385 [(set (match_operand:SI 0 "register_operand" "=r")
4386 (zero_extend:SI
4387 (vec_select:HI
4388 (match_operand:V8HI 1 "register_operand" "x")
4389 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
4390 "TARGET_SSE2"
4391 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4392 [(set_attr "type" "sselog")
4393 (set_attr "prefix_data16" "1")
4394 (set_attr "mode" "TI")])
4395
4396 (define_insn "*sse4_1_pextrw_memory"
4397 [(set (match_operand:HI 0 "memory_operand" "=m")
4398 (vec_select:HI
4399 (match_operand:V8HI 1 "register_operand" "x")
4400 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
4401 "TARGET_SSE4_1"
4402 "pextrw\t{%2, %1, %0|%0, %1, %2}"
4403 [(set_attr "type" "sselog")
4404 (set_attr "prefix_extra" "1")
4405 (set_attr "mode" "TI")])
4406
4407 (define_insn "*sse4_1_pextrd"
4408 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
4409 (vec_select:SI
4410 (match_operand:V4SI 1 "register_operand" "x")
4411 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4412 "TARGET_SSE4_1"
4413 "pextrd\t{%2, %1, %0|%0, %1, %2}"
4414 [(set_attr "type" "sselog")
4415 (set_attr "prefix_extra" "1")
4416 (set_attr "mode" "TI")])
4417
4418 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
4419 (define_insn "*sse4_1_pextrq"
4420 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
4421 (vec_select:DI
4422 (match_operand:V2DI 1 "register_operand" "x")
4423 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
4424 "TARGET_SSE4_1 && TARGET_64BIT"
4425 "pextrq\t{%2, %1, %0|%0, %1, %2}"
4426 [(set_attr "type" "sselog")
4427 (set_attr "prefix_extra" "1")
4428 (set_attr "mode" "TI")])
4429
4430 (define_expand "sse2_pshufd"
4431 [(match_operand:V4SI 0 "register_operand" "")
4432 (match_operand:V4SI 1 "nonimmediate_operand" "")
4433 (match_operand:SI 2 "const_int_operand" "")]
4434 "TARGET_SSE2"
4435 {
4436 int mask = INTVAL (operands[2]);
4437 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
4438 GEN_INT ((mask >> 0) & 3),
4439 GEN_INT ((mask >> 2) & 3),
4440 GEN_INT ((mask >> 4) & 3),
4441 GEN_INT ((mask >> 6) & 3)));
4442 DONE;
4443 })
4444
4445 (define_insn "sse2_pshufd_1"
4446 [(set (match_operand:V4SI 0 "register_operand" "=x")
4447 (vec_select:V4SI
4448 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4449 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4450 (match_operand 3 "const_0_to_3_operand" "")
4451 (match_operand 4 "const_0_to_3_operand" "")
4452 (match_operand 5 "const_0_to_3_operand" "")])))]
4453 "TARGET_SSE2"
4454 {
4455 int mask = 0;
4456 mask |= INTVAL (operands[2]) << 0;
4457 mask |= INTVAL (operands[3]) << 2;
4458 mask |= INTVAL (operands[4]) << 4;
4459 mask |= INTVAL (operands[5]) << 6;
4460 operands[2] = GEN_INT (mask);
4461
4462 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
4463 }
4464 [(set_attr "type" "sselog1")
4465 (set_attr "prefix_data16" "1")
4466 (set_attr "mode" "TI")])
4467
4468 (define_expand "sse2_pshuflw"
4469 [(match_operand:V8HI 0 "register_operand" "")
4470 (match_operand:V8HI 1 "nonimmediate_operand" "")
4471 (match_operand:SI 2 "const_int_operand" "")]
4472 "TARGET_SSE2"
4473 {
4474 int mask = INTVAL (operands[2]);
4475 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
4476 GEN_INT ((mask >> 0) & 3),
4477 GEN_INT ((mask >> 2) & 3),
4478 GEN_INT ((mask >> 4) & 3),
4479 GEN_INT ((mask >> 6) & 3)));
4480 DONE;
4481 })
4482
4483 (define_insn "sse2_pshuflw_1"
4484 [(set (match_operand:V8HI 0 "register_operand" "=x")
4485 (vec_select:V8HI
4486 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4487 (parallel [(match_operand 2 "const_0_to_3_operand" "")
4488 (match_operand 3 "const_0_to_3_operand" "")
4489 (match_operand 4 "const_0_to_3_operand" "")
4490 (match_operand 5 "const_0_to_3_operand" "")
4491 (const_int 4)
4492 (const_int 5)
4493 (const_int 6)
4494 (const_int 7)])))]
4495 "TARGET_SSE2"
4496 {
4497 int mask = 0;
4498 mask |= INTVAL (operands[2]) << 0;
4499 mask |= INTVAL (operands[3]) << 2;
4500 mask |= INTVAL (operands[4]) << 4;
4501 mask |= INTVAL (operands[5]) << 6;
4502 operands[2] = GEN_INT (mask);
4503
4504 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
4505 }
4506 [(set_attr "type" "sselog")
4507 (set_attr "prefix_rep" "1")
4508 (set_attr "mode" "TI")])
4509
4510 (define_expand "sse2_pshufhw"
4511 [(match_operand:V8HI 0 "register_operand" "")
4512 (match_operand:V8HI 1 "nonimmediate_operand" "")
4513 (match_operand:SI 2 "const_int_operand" "")]
4514 "TARGET_SSE2"
4515 {
4516 int mask = INTVAL (operands[2]);
4517 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
4518 GEN_INT (((mask >> 0) & 3) + 4),
4519 GEN_INT (((mask >> 2) & 3) + 4),
4520 GEN_INT (((mask >> 4) & 3) + 4),
4521 GEN_INT (((mask >> 6) & 3) + 4)));
4522 DONE;
4523 })
4524
4525 (define_insn "sse2_pshufhw_1"
4526 [(set (match_operand:V8HI 0 "register_operand" "=x")
4527 (vec_select:V8HI
4528 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
4529 (parallel [(const_int 0)
4530 (const_int 1)
4531 (const_int 2)
4532 (const_int 3)
4533 (match_operand 2 "const_4_to_7_operand" "")
4534 (match_operand 3 "const_4_to_7_operand" "")
4535 (match_operand 4 "const_4_to_7_operand" "")
4536 (match_operand 5 "const_4_to_7_operand" "")])))]
4537 "TARGET_SSE2"
4538 {
4539 int mask = 0;
4540 mask |= (INTVAL (operands[2]) - 4) << 0;
4541 mask |= (INTVAL (operands[3]) - 4) << 2;
4542 mask |= (INTVAL (operands[4]) - 4) << 4;
4543 mask |= (INTVAL (operands[5]) - 4) << 6;
4544 operands[2] = GEN_INT (mask);
4545
4546 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
4547 }
4548 [(set_attr "type" "sselog")
4549 (set_attr "prefix_rep" "1")
4550 (set_attr "mode" "TI")])
4551
4552 (define_expand "sse2_loadd"
4553 [(set (match_operand:V4SI 0 "register_operand" "")
4554 (vec_merge:V4SI
4555 (vec_duplicate:V4SI
4556 (match_operand:SI 1 "nonimmediate_operand" ""))
4557 (match_dup 2)
4558 (const_int 1)))]
4559 "TARGET_SSE"
4560 "operands[2] = CONST0_RTX (V4SImode);")
4561
4562 (define_insn "sse2_loadld"
4563 [(set (match_operand:V4SI 0 "register_operand" "=Yt,Yi,x,x")
4564 (vec_merge:V4SI
4565 (vec_duplicate:V4SI
4566 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
4567 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
4568 (const_int 1)))]
4569 "TARGET_SSE"
4570 "@
4571 movd\t{%2, %0|%0, %2}
4572 movd\t{%2, %0|%0, %2}
4573 movss\t{%2, %0|%0, %2}
4574 movss\t{%2, %0|%0, %2}"
4575 [(set_attr "type" "ssemov")
4576 (set_attr "mode" "TI,TI,V4SF,SF")])
4577
4578 (define_insn_and_split "sse2_stored"
4579 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
4580 (vec_select:SI
4581 (match_operand:V4SI 1 "register_operand" "x,Yi")
4582 (parallel [(const_int 0)])))]
4583 "TARGET_SSE"
4584 "#"
4585 "&& reload_completed
4586 && (TARGET_INTER_UNIT_MOVES
4587 || MEM_P (operands [0])
4588 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4589 [(set (match_dup 0) (match_dup 1))]
4590 {
4591 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
4592 })
4593
4594 (define_expand "sse_storeq"
4595 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4596 (vec_select:DI
4597 (match_operand:V2DI 1 "register_operand" "")
4598 (parallel [(const_int 0)])))]
4599 "TARGET_SSE"
4600 "")
4601
4602 (define_insn "*sse2_storeq_rex64"
4603 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
4604 (vec_select:DI
4605 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
4606 (parallel [(const_int 0)])))]
4607 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4608 "@
4609 #
4610 #
4611 mov{q}\t{%1, %0|%0, %1}"
4612 [(set_attr "type" "*,*,imov")
4613 (set_attr "mode" "*,*,DI")])
4614
4615 (define_insn "*sse2_storeq"
4616 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
4617 (vec_select:DI
4618 (match_operand:V2DI 1 "register_operand" "x")
4619 (parallel [(const_int 0)])))]
4620 "TARGET_SSE"
4621 "#")
4622
4623 (define_split
4624 [(set (match_operand:DI 0 "nonimmediate_operand" "")
4625 (vec_select:DI
4626 (match_operand:V2DI 1 "register_operand" "")
4627 (parallel [(const_int 0)])))]
4628 "TARGET_SSE
4629 && reload_completed
4630 && (TARGET_INTER_UNIT_MOVES
4631 || MEM_P (operands [0])
4632 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
4633 [(set (match_dup 0) (match_dup 1))]
4634 {
4635 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
4636 })
4637
4638 (define_insn "*vec_extractv2di_1_rex64"
4639 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
4640 (vec_select:DI
4641 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
4642 (parallel [(const_int 1)])))]
4643 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4644 "@
4645 movhps\t{%1, %0|%0, %1}
4646 psrldq\t{$8, %0|%0, 8}
4647 movq\t{%H1, %0|%0, %H1}
4648 mov{q}\t{%H1, %0|%0, %H1}"
4649 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
4650 (set_attr "memory" "*,none,*,*")
4651 (set_attr "mode" "V2SF,TI,TI,DI")])
4652
4653 (define_insn "*vec_extractv2di_1_sse2"
4654 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4655 (vec_select:DI
4656 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
4657 (parallel [(const_int 1)])))]
4658 "!TARGET_64BIT
4659 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4660 "@
4661 movhps\t{%1, %0|%0, %1}
4662 psrldq\t{$8, %0|%0, 8}
4663 movq\t{%H1, %0|%0, %H1}"
4664 [(set_attr "type" "ssemov,sseishft,ssemov")
4665 (set_attr "memory" "*,none,*")
4666 (set_attr "mode" "V2SF,TI,TI")])
4667
4668 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
4669 (define_insn "*vec_extractv2di_1_sse"
4670 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
4671 (vec_select:DI
4672 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
4673 (parallel [(const_int 1)])))]
4674 "!TARGET_SSE2 && TARGET_SSE
4675 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4676 "@
4677 movhps\t{%1, %0|%0, %1}
4678 movhlps\t{%1, %0|%0, %1}
4679 movlps\t{%H1, %0|%0, %H1}"
4680 [(set_attr "type" "ssemov")
4681 (set_attr "mode" "V2SF,V4SF,V2SF")])
4682
4683 (define_insn "*vec_dupv4si"
4684 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x")
4685 (vec_duplicate:V4SI
4686 (match_operand:SI 1 "register_operand" " Yt,0")))]
4687 "TARGET_SSE"
4688 "@
4689 pshufd\t{$0, %1, %0|%0, %1, 0}
4690 shufps\t{$0, %0, %0|%0, %0, 0}"
4691 [(set_attr "type" "sselog1")
4692 (set_attr "mode" "TI,V4SF")])
4693
4694 (define_insn "*vec_dupv2di"
4695 [(set (match_operand:V2DI 0 "register_operand" "=Yt,x")
4696 (vec_duplicate:V2DI
4697 (match_operand:DI 1 "register_operand" " 0 ,0")))]
4698 "TARGET_SSE"
4699 "@
4700 punpcklqdq\t%0, %0
4701 movlhps\t%0, %0"
4702 [(set_attr "type" "sselog1,ssemov")
4703 (set_attr "mode" "TI,V4SF")])
4704
4705 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4706 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4707 ;; alternatives pretty much forces the MMX alternative to be chosen.
4708 (define_insn "*sse2_concatv2si"
4709 [(set (match_operand:V2SI 0 "register_operand" "=Yt, Yt,*y,*y")
4710 (vec_concat:V2SI
4711 (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
4712 (match_operand:SI 2 "reg_or_0_operand" " Yt,C ,*y, C")))]
4713 "TARGET_SSE2"
4714 "@
4715 punpckldq\t{%2, %0|%0, %2}
4716 movd\t{%1, %0|%0, %1}
4717 punpckldq\t{%2, %0|%0, %2}
4718 movd\t{%1, %0|%0, %1}"
4719 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4720 (set_attr "mode" "TI,TI,DI,DI")])
4721
4722 (define_insn "*sse1_concatv2si"
4723 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
4724 (vec_concat:V2SI
4725 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
4726 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
4727 "TARGET_SSE"
4728 "@
4729 unpcklps\t{%2, %0|%0, %2}
4730 movss\t{%1, %0|%0, %1}
4731 punpckldq\t{%2, %0|%0, %2}
4732 movd\t{%1, %0|%0, %1}"
4733 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4734 (set_attr "mode" "V4SF,V4SF,DI,DI")])
4735
4736 (define_insn "*vec_concatv4si_1"
4737 [(set (match_operand:V4SI 0 "register_operand" "=Yt,x,x")
4738 (vec_concat:V4SI
4739 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
4740 (match_operand:V2SI 2 "nonimmediate_operand" " Yt,x,m")))]
4741 "TARGET_SSE"
4742 "@
4743 punpcklqdq\t{%2, %0|%0, %2}
4744 movlhps\t{%2, %0|%0, %2}
4745 movhps\t{%2, %0|%0, %2}"
4746 [(set_attr "type" "sselog,ssemov,ssemov")
4747 (set_attr "mode" "TI,V4SF,V2SF")])
4748
4749 (define_insn "vec_concatv2di"
4750 [(set (match_operand:V2DI 0 "register_operand" "=Yt,?Yt,Yt,x,x,x")
4751 (vec_concat:V2DI
4752 (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
4753 (match_operand:DI 2 "vector_move_operand" " C, C,Yt,x,m,0")))]
4754 "!TARGET_64BIT && TARGET_SSE"
4755 "@
4756 movq\t{%1, %0|%0, %1}
4757 movq2dq\t{%1, %0|%0, %1}
4758 punpcklqdq\t{%2, %0|%0, %2}
4759 movlhps\t{%2, %0|%0, %2}
4760 movhps\t{%2, %0|%0, %2}
4761 movlps\t{%1, %0|%0, %1}"
4762 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4763 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
4764
4765 (define_insn "*vec_concatv2di_rex"
4766 [(set (match_operand:V2DI 0 "register_operand" "=Yt,Yi,!Yt,Yt,x,x,x")
4767 (vec_concat:V2DI
4768 (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
4769 (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Yt,x,m,0")))]
4770 "TARGET_64BIT"
4771 "@
4772 movq\t{%1, %0|%0, %1}
4773 movq\t{%1, %0|%0, %1}
4774 movq2dq\t{%1, %0|%0, %1}
4775 punpcklqdq\t{%2, %0|%0, %2}
4776 movlhps\t{%2, %0|%0, %2}
4777 movhps\t{%2, %0|%0, %2}
4778 movlps\t{%1, %0|%0, %1}"
4779 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4780 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
4781
4782 (define_expand "vec_setv2di"
4783 [(match_operand:V2DI 0 "register_operand" "")
4784 (match_operand:DI 1 "register_operand" "")
4785 (match_operand 2 "const_int_operand" "")]
4786 "TARGET_SSE"
4787 {
4788 ix86_expand_vector_set (false, operands[0], operands[1],
4789 INTVAL (operands[2]));
4790 DONE;
4791 })
4792
4793 (define_expand "vec_extractv2di"
4794 [(match_operand:DI 0 "register_operand" "")
4795 (match_operand:V2DI 1 "register_operand" "")
4796 (match_operand 2 "const_int_operand" "")]
4797 "TARGET_SSE"
4798 {
4799 ix86_expand_vector_extract (false, operands[0], operands[1],
4800 INTVAL (operands[2]));
4801 DONE;
4802 })
4803
4804 (define_expand "vec_initv2di"
4805 [(match_operand:V2DI 0 "register_operand" "")
4806 (match_operand 1 "" "")]
4807 "TARGET_SSE"
4808 {
4809 ix86_expand_vector_init (false, operands[0], operands[1]);
4810 DONE;
4811 })
4812
4813 (define_expand "vec_setv4si"
4814 [(match_operand:V4SI 0 "register_operand" "")
4815 (match_operand:SI 1 "register_operand" "")
4816 (match_operand 2 "const_int_operand" "")]
4817 "TARGET_SSE"
4818 {
4819 ix86_expand_vector_set (false, operands[0], operands[1],
4820 INTVAL (operands[2]));
4821 DONE;
4822 })
4823
4824 (define_expand "vec_extractv4si"
4825 [(match_operand:SI 0 "register_operand" "")
4826 (match_operand:V4SI 1 "register_operand" "")
4827 (match_operand 2 "const_int_operand" "")]
4828 "TARGET_SSE"
4829 {
4830 ix86_expand_vector_extract (false, operands[0], operands[1],
4831 INTVAL (operands[2]));
4832 DONE;
4833 })
4834
4835 (define_expand "vec_initv4si"
4836 [(match_operand:V4SI 0 "register_operand" "")
4837 (match_operand 1 "" "")]
4838 "TARGET_SSE"
4839 {
4840 ix86_expand_vector_init (false, operands[0], operands[1]);
4841 DONE;
4842 })
4843
4844 (define_expand "vec_setv8hi"
4845 [(match_operand:V8HI 0 "register_operand" "")
4846 (match_operand:HI 1 "register_operand" "")
4847 (match_operand 2 "const_int_operand" "")]
4848 "TARGET_SSE"
4849 {
4850 ix86_expand_vector_set (false, operands[0], operands[1],
4851 INTVAL (operands[2]));
4852 DONE;
4853 })
4854
4855 (define_expand "vec_extractv8hi"
4856 [(match_operand:HI 0 "register_operand" "")
4857 (match_operand:V8HI 1 "register_operand" "")
4858 (match_operand 2 "const_int_operand" "")]
4859 "TARGET_SSE"
4860 {
4861 ix86_expand_vector_extract (false, operands[0], operands[1],
4862 INTVAL (operands[2]));
4863 DONE;
4864 })
4865
4866 (define_expand "vec_initv8hi"
4867 [(match_operand:V8HI 0 "register_operand" "")
4868 (match_operand 1 "" "")]
4869 "TARGET_SSE"
4870 {
4871 ix86_expand_vector_init (false, operands[0], operands[1]);
4872 DONE;
4873 })
4874
4875 (define_expand "vec_setv16qi"
4876 [(match_operand:V16QI 0 "register_operand" "")
4877 (match_operand:QI 1 "register_operand" "")
4878 (match_operand 2 "const_int_operand" "")]
4879 "TARGET_SSE"
4880 {
4881 ix86_expand_vector_set (false, operands[0], operands[1],
4882 INTVAL (operands[2]));
4883 DONE;
4884 })
4885
4886 (define_expand "vec_extractv16qi"
4887 [(match_operand:QI 0 "register_operand" "")
4888 (match_operand:V16QI 1 "register_operand" "")
4889 (match_operand 2 "const_int_operand" "")]
4890 "TARGET_SSE"
4891 {
4892 ix86_expand_vector_extract (false, operands[0], operands[1],
4893 INTVAL (operands[2]));
4894 DONE;
4895 })
4896
4897 (define_expand "vec_initv16qi"
4898 [(match_operand:V16QI 0 "register_operand" "")
4899 (match_operand 1 "" "")]
4900 "TARGET_SSE"
4901 {
4902 ix86_expand_vector_init (false, operands[0], operands[1]);
4903 DONE;
4904 })
4905
4906 (define_expand "vec_unpacku_hi_v16qi"
4907 [(match_operand:V8HI 0 "register_operand" "")
4908 (match_operand:V16QI 1 "register_operand" "")]
4909 "TARGET_SSE2"
4910 {
4911 if (TARGET_SSE4_1)
4912 ix86_expand_sse4_unpack (operands, true, true);
4913 else
4914 ix86_expand_sse_unpack (operands, true, true);
4915 DONE;
4916 })
4917
4918 (define_expand "vec_unpacks_hi_v16qi"
4919 [(match_operand:V8HI 0 "register_operand" "")
4920 (match_operand:V16QI 1 "register_operand" "")]
4921 "TARGET_SSE2"
4922 {
4923 if (TARGET_SSE4_1)
4924 ix86_expand_sse4_unpack (operands, false, true);
4925 else
4926 ix86_expand_sse_unpack (operands, false, true);
4927 DONE;
4928 })
4929
4930 (define_expand "vec_unpacku_lo_v16qi"
4931 [(match_operand:V8HI 0 "register_operand" "")
4932 (match_operand:V16QI 1 "register_operand" "")]
4933 "TARGET_SSE2"
4934 {
4935 if (TARGET_SSE4_1)
4936 ix86_expand_sse4_unpack (operands, true, false);
4937 else
4938 ix86_expand_sse_unpack (operands, true, false);
4939 DONE;
4940 })
4941
4942 (define_expand "vec_unpacks_lo_v16qi"
4943 [(match_operand:V8HI 0 "register_operand" "")
4944 (match_operand:V16QI 1 "register_operand" "")]
4945 "TARGET_SSE2"
4946 {
4947 if (TARGET_SSE4_1)
4948 ix86_expand_sse4_unpack (operands, false, false);
4949 else
4950 ix86_expand_sse_unpack (operands, false, false);
4951 DONE;
4952 })
4953
4954 (define_expand "vec_unpacku_hi_v8hi"
4955 [(match_operand:V4SI 0 "register_operand" "")
4956 (match_operand:V8HI 1 "register_operand" "")]
4957 "TARGET_SSE2"
4958 {
4959 if (TARGET_SSE4_1)
4960 ix86_expand_sse4_unpack (operands, true, true);
4961 else
4962 ix86_expand_sse_unpack (operands, true, true);
4963 DONE;
4964 })
4965
4966 (define_expand "vec_unpacks_hi_v8hi"
4967 [(match_operand:V4SI 0 "register_operand" "")
4968 (match_operand:V8HI 1 "register_operand" "")]
4969 "TARGET_SSE2"
4970 {
4971 if (TARGET_SSE4_1)
4972 ix86_expand_sse4_unpack (operands, false, true);
4973 else
4974 ix86_expand_sse_unpack (operands, false, true);
4975 DONE;
4976 })
4977
4978 (define_expand "vec_unpacku_lo_v8hi"
4979 [(match_operand:V4SI 0 "register_operand" "")
4980 (match_operand:V8HI 1 "register_operand" "")]
4981 "TARGET_SSE2"
4982 {
4983 if (TARGET_SSE4_1)
4984 ix86_expand_sse4_unpack (operands, true, false);
4985 else
4986 ix86_expand_sse_unpack (operands, true, false);
4987 DONE;
4988 })
4989
4990 (define_expand "vec_unpacks_lo_v8hi"
4991 [(match_operand:V4SI 0 "register_operand" "")
4992 (match_operand:V8HI 1 "register_operand" "")]
4993 "TARGET_SSE2"
4994 {
4995 if (TARGET_SSE4_1)
4996 ix86_expand_sse4_unpack (operands, false, false);
4997 else
4998 ix86_expand_sse_unpack (operands, false, false);
4999 DONE;
5000 })
5001
5002 (define_expand "vec_unpacku_hi_v4si"
5003 [(match_operand:V2DI 0 "register_operand" "")
5004 (match_operand:V4SI 1 "register_operand" "")]
5005 "TARGET_SSE2"
5006 {
5007 if (TARGET_SSE4_1)
5008 ix86_expand_sse4_unpack (operands, true, true);
5009 else
5010 ix86_expand_sse_unpack (operands, true, true);
5011 DONE;
5012 })
5013
5014 (define_expand "vec_unpacks_hi_v4si"
5015 [(match_operand:V2DI 0 "register_operand" "")
5016 (match_operand:V4SI 1 "register_operand" "")]
5017 "TARGET_SSE2"
5018 {
5019 if (TARGET_SSE4_1)
5020 ix86_expand_sse4_unpack (operands, false, true);
5021 else
5022 ix86_expand_sse_unpack (operands, false, true);
5023 DONE;
5024 })
5025
5026 (define_expand "vec_unpacku_lo_v4si"
5027 [(match_operand:V2DI 0 "register_operand" "")
5028 (match_operand:V4SI 1 "register_operand" "")]
5029 "TARGET_SSE2"
5030 {
5031 if (TARGET_SSE4_1)
5032 ix86_expand_sse4_unpack (operands, true, false);
5033 else
5034 ix86_expand_sse_unpack (operands, true, false);
5035 DONE;
5036 })
5037
5038 (define_expand "vec_unpacks_lo_v4si"
5039 [(match_operand:V2DI 0 "register_operand" "")
5040 (match_operand:V4SI 1 "register_operand" "")]
5041 "TARGET_SSE2"
5042 {
5043 if (TARGET_SSE4_1)
5044 ix86_expand_sse4_unpack (operands, false, false);
5045 else
5046 ix86_expand_sse_unpack (operands, false, false);
5047 DONE;
5048 })
5049
5050 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5051 ;;
5052 ;; Miscellaneous
5053 ;;
5054 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5055
5056 (define_insn "sse2_uavgv16qi3"
5057 [(set (match_operand:V16QI 0 "register_operand" "=x")
5058 (truncate:V16QI
5059 (lshiftrt:V16HI
5060 (plus:V16HI
5061 (plus:V16HI
5062 (zero_extend:V16HI
5063 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
5064 (zero_extend:V16HI
5065 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
5066 (const_vector:V16QI [(const_int 1) (const_int 1)
5067 (const_int 1) (const_int 1)
5068 (const_int 1) (const_int 1)
5069 (const_int 1) (const_int 1)
5070 (const_int 1) (const_int 1)
5071 (const_int 1) (const_int 1)
5072 (const_int 1) (const_int 1)
5073 (const_int 1) (const_int 1)]))
5074 (const_int 1))))]
5075 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
5076 "pavgb\t{%2, %0|%0, %2}"
5077 [(set_attr "type" "sseiadd")
5078 (set_attr "prefix_data16" "1")
5079 (set_attr "mode" "TI")])
5080
5081 (define_insn "sse2_uavgv8hi3"
5082 [(set (match_operand:V8HI 0 "register_operand" "=x")
5083 (truncate:V8HI
5084 (lshiftrt:V8SI
5085 (plus:V8SI
5086 (plus:V8SI
5087 (zero_extend:V8SI
5088 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5089 (zero_extend:V8SI
5090 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5091 (const_vector:V8HI [(const_int 1) (const_int 1)
5092 (const_int 1) (const_int 1)
5093 (const_int 1) (const_int 1)
5094 (const_int 1) (const_int 1)]))
5095 (const_int 1))))]
5096 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
5097 "pavgw\t{%2, %0|%0, %2}"
5098 [(set_attr "type" "sseiadd")
5099 (set_attr "prefix_data16" "1")
5100 (set_attr "mode" "TI")])
5101
5102 ;; The correct representation for this is absolutely enormous, and
5103 ;; surely not generally useful.
5104 (define_insn "sse2_psadbw"
5105 [(set (match_operand:V2DI 0 "register_operand" "=x")
5106 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
5107 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5108 UNSPEC_PSADBW))]
5109 "TARGET_SSE2"
5110 "psadbw\t{%2, %0|%0, %2}"
5111 [(set_attr "type" "sseiadd")
5112 (set_attr "prefix_data16" "1")
5113 (set_attr "mode" "TI")])
5114
5115 (define_insn "sse_movmskps"
5116 [(set (match_operand:SI 0 "register_operand" "=r")
5117 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
5118 UNSPEC_MOVMSK))]
5119 "TARGET_SSE"
5120 "movmskps\t{%1, %0|%0, %1}"
5121 [(set_attr "type" "ssecvt")
5122 (set_attr "mode" "V4SF")])
5123
5124 (define_insn "sse2_movmskpd"
5125 [(set (match_operand:SI 0 "register_operand" "=r")
5126 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
5127 UNSPEC_MOVMSK))]
5128 "TARGET_SSE2"
5129 "movmskpd\t{%1, %0|%0, %1}"
5130 [(set_attr "type" "ssecvt")
5131 (set_attr "mode" "V2DF")])
5132
5133 (define_insn "sse2_pmovmskb"
5134 [(set (match_operand:SI 0 "register_operand" "=r")
5135 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
5136 UNSPEC_MOVMSK))]
5137 "TARGET_SSE2"
5138 "pmovmskb\t{%1, %0|%0, %1}"
5139 [(set_attr "type" "ssecvt")
5140 (set_attr "prefix_data16" "1")
5141 (set_attr "mode" "SI")])
5142
5143 (define_expand "sse2_maskmovdqu"
5144 [(set (match_operand:V16QI 0 "memory_operand" "")
5145 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5146 (match_operand:V16QI 2 "register_operand" "x")
5147 (match_dup 0)]
5148 UNSPEC_MASKMOV))]
5149 "TARGET_SSE2"
5150 "")
5151
5152 (define_insn "*sse2_maskmovdqu"
5153 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
5154 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5155 (match_operand:V16QI 2 "register_operand" "x")
5156 (mem:V16QI (match_dup 0))]
5157 UNSPEC_MASKMOV))]
5158 "TARGET_SSE2 && !TARGET_64BIT"
5159 ;; @@@ check ordering of operands in intel/nonintel syntax
5160 "maskmovdqu\t{%2, %1|%1, %2}"
5161 [(set_attr "type" "ssecvt")
5162 (set_attr "prefix_data16" "1")
5163 (set_attr "mode" "TI")])
5164
5165 (define_insn "*sse2_maskmovdqu_rex64"
5166 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
5167 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
5168 (match_operand:V16QI 2 "register_operand" "x")
5169 (mem:V16QI (match_dup 0))]
5170 UNSPEC_MASKMOV))]
5171 "TARGET_SSE2 && TARGET_64BIT"
5172 ;; @@@ check ordering of operands in intel/nonintel syntax
5173 "maskmovdqu\t{%2, %1|%1, %2}"
5174 [(set_attr "type" "ssecvt")
5175 (set_attr "prefix_data16" "1")
5176 (set_attr "mode" "TI")])
5177
5178 (define_insn "sse_ldmxcsr"
5179 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
5180 UNSPECV_LDMXCSR)]
5181 "TARGET_SSE"
5182 "ldmxcsr\t%0"
5183 [(set_attr "type" "sse")
5184 (set_attr "memory" "load")])
5185
5186 (define_insn "sse_stmxcsr"
5187 [(set (match_operand:SI 0 "memory_operand" "=m")
5188 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
5189 "TARGET_SSE"
5190 "stmxcsr\t%0"
5191 [(set_attr "type" "sse")
5192 (set_attr "memory" "store")])
5193
5194 (define_expand "sse_sfence"
5195 [(set (match_dup 0)
5196 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5197 "TARGET_SSE || TARGET_3DNOW_A"
5198 {
5199 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5200 MEM_VOLATILE_P (operands[0]) = 1;
5201 })
5202
5203 (define_insn "*sse_sfence"
5204 [(set (match_operand:BLK 0 "" "")
5205 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
5206 "TARGET_SSE || TARGET_3DNOW_A"
5207 "sfence"
5208 [(set_attr "type" "sse")
5209 (set_attr "memory" "unknown")])
5210
5211 (define_insn "sse2_clflush"
5212 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
5213 UNSPECV_CLFLUSH)]
5214 "TARGET_SSE2"
5215 "clflush\t%a0"
5216 [(set_attr "type" "sse")
5217 (set_attr "memory" "unknown")])
5218
5219 (define_expand "sse2_mfence"
5220 [(set (match_dup 0)
5221 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5222 "TARGET_SSE2"
5223 {
5224 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5225 MEM_VOLATILE_P (operands[0]) = 1;
5226 })
5227
5228 (define_insn "*sse2_mfence"
5229 [(set (match_operand:BLK 0 "" "")
5230 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
5231 "TARGET_SSE2"
5232 "mfence"
5233 [(set_attr "type" "sse")
5234 (set_attr "memory" "unknown")])
5235
5236 (define_expand "sse2_lfence"
5237 [(set (match_dup 0)
5238 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5239 "TARGET_SSE2"
5240 {
5241 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
5242 MEM_VOLATILE_P (operands[0]) = 1;
5243 })
5244
5245 (define_insn "*sse2_lfence"
5246 [(set (match_operand:BLK 0 "" "")
5247 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
5248 "TARGET_SSE2"
5249 "lfence"
5250 [(set_attr "type" "sse")
5251 (set_attr "memory" "unknown")])
5252
5253 (define_insn "sse3_mwait"
5254 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5255 (match_operand:SI 1 "register_operand" "c")]
5256 UNSPECV_MWAIT)]
5257 "TARGET_SSE3"
5258 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
5259 ;; Since 32bit register operands are implicitly zero extended to 64bit,
5260 ;; we only need to set up 32bit registers.
5261 "mwait"
5262 [(set_attr "length" "3")])
5263
5264 (define_insn "sse3_monitor"
5265 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
5266 (match_operand:SI 1 "register_operand" "c")
5267 (match_operand:SI 2 "register_operand" "d")]
5268 UNSPECV_MONITOR)]
5269 "TARGET_SSE3 && !TARGET_64BIT"
5270 "monitor\t%0, %1, %2"
5271 [(set_attr "length" "3")])
5272
5273 (define_insn "sse3_monitor64"
5274 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
5275 (match_operand:SI 1 "register_operand" "c")
5276 (match_operand:SI 2 "register_operand" "d")]
5277 UNSPECV_MONITOR)]
5278 "TARGET_SSE3 && TARGET_64BIT"
5279 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
5280 ;; RCX and RDX are used. Since 32bit register operands are implicitly
5281 ;; zero extended to 64bit, we only need to set up 32bit registers.
5282 "monitor"
5283 [(set_attr "length" "3")])
5284
5285 ;; SSSE3
5286 (define_insn "ssse3_phaddwv8hi3"
5287 [(set (match_operand:V8HI 0 "register_operand" "=x")
5288 (vec_concat:V8HI
5289 (vec_concat:V4HI
5290 (vec_concat:V2HI
5291 (plus:HI
5292 (vec_select:HI
5293 (match_operand:V8HI 1 "register_operand" "0")
5294 (parallel [(const_int 0)]))
5295 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5296 (plus:HI
5297 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5298 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5299 (vec_concat:V2HI
5300 (plus:HI
5301 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5302 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5303 (plus:HI
5304 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5305 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5306 (vec_concat:V4HI
5307 (vec_concat:V2HI
5308 (plus:HI
5309 (vec_select:HI
5310 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5311 (parallel [(const_int 0)]))
5312 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5313 (plus:HI
5314 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5315 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5316 (vec_concat:V2HI
5317 (plus:HI
5318 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5319 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5320 (plus:HI
5321 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5322 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5323 "TARGET_SSSE3"
5324 "phaddw\t{%2, %0|%0, %2}"
5325 [(set_attr "type" "sseiadd")
5326 (set_attr "prefix_data16" "1")
5327 (set_attr "prefix_extra" "1")
5328 (set_attr "mode" "TI")])
5329
5330 (define_insn "ssse3_phaddwv4hi3"
5331 [(set (match_operand:V4HI 0 "register_operand" "=y")
5332 (vec_concat:V4HI
5333 (vec_concat:V2HI
5334 (plus:HI
5335 (vec_select:HI
5336 (match_operand:V4HI 1 "register_operand" "0")
5337 (parallel [(const_int 0)]))
5338 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5339 (plus:HI
5340 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5341 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5342 (vec_concat:V2HI
5343 (plus:HI
5344 (vec_select:HI
5345 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5346 (parallel [(const_int 0)]))
5347 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5348 (plus:HI
5349 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5350 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5351 "TARGET_SSSE3"
5352 "phaddw\t{%2, %0|%0, %2}"
5353 [(set_attr "type" "sseiadd")
5354 (set_attr "prefix_extra" "1")
5355 (set_attr "mode" "DI")])
5356
5357 (define_insn "ssse3_phadddv4si3"
5358 [(set (match_operand:V4SI 0 "register_operand" "=x")
5359 (vec_concat:V4SI
5360 (vec_concat:V2SI
5361 (plus:SI
5362 (vec_select:SI
5363 (match_operand:V4SI 1 "register_operand" "0")
5364 (parallel [(const_int 0)]))
5365 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5366 (plus:SI
5367 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5368 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5369 (vec_concat:V2SI
5370 (plus:SI
5371 (vec_select:SI
5372 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5373 (parallel [(const_int 0)]))
5374 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5375 (plus:SI
5376 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5377 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5378 "TARGET_SSSE3"
5379 "phaddd\t{%2, %0|%0, %2}"
5380 [(set_attr "type" "sseiadd")
5381 (set_attr "prefix_data16" "1")
5382 (set_attr "prefix_extra" "1")
5383 (set_attr "mode" "TI")])
5384
5385 (define_insn "ssse3_phadddv2si3"
5386 [(set (match_operand:V2SI 0 "register_operand" "=y")
5387 (vec_concat:V2SI
5388 (plus:SI
5389 (vec_select:SI
5390 (match_operand:V2SI 1 "register_operand" "0")
5391 (parallel [(const_int 0)]))
5392 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5393 (plus:SI
5394 (vec_select:SI
5395 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5396 (parallel [(const_int 0)]))
5397 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5398 "TARGET_SSSE3"
5399 "phaddd\t{%2, %0|%0, %2}"
5400 [(set_attr "type" "sseiadd")
5401 (set_attr "prefix_extra" "1")
5402 (set_attr "mode" "DI")])
5403
5404 (define_insn "ssse3_phaddswv8hi3"
5405 [(set (match_operand:V8HI 0 "register_operand" "=x")
5406 (vec_concat:V8HI
5407 (vec_concat:V4HI
5408 (vec_concat:V2HI
5409 (ss_plus:HI
5410 (vec_select:HI
5411 (match_operand:V8HI 1 "register_operand" "0")
5412 (parallel [(const_int 0)]))
5413 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5414 (ss_plus:HI
5415 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5416 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5417 (vec_concat:V2HI
5418 (ss_plus:HI
5419 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5420 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5421 (ss_plus:HI
5422 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5423 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5424 (vec_concat:V4HI
5425 (vec_concat:V2HI
5426 (ss_plus:HI
5427 (vec_select:HI
5428 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5429 (parallel [(const_int 0)]))
5430 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5431 (ss_plus:HI
5432 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5433 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5434 (vec_concat:V2HI
5435 (ss_plus:HI
5436 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5437 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5438 (ss_plus:HI
5439 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5440 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5441 "TARGET_SSSE3"
5442 "phaddsw\t{%2, %0|%0, %2}"
5443 [(set_attr "type" "sseiadd")
5444 (set_attr "prefix_data16" "1")
5445 (set_attr "prefix_extra" "1")
5446 (set_attr "mode" "TI")])
5447
5448 (define_insn "ssse3_phaddswv4hi3"
5449 [(set (match_operand:V4HI 0 "register_operand" "=y")
5450 (vec_concat:V4HI
5451 (vec_concat:V2HI
5452 (ss_plus:HI
5453 (vec_select:HI
5454 (match_operand:V4HI 1 "register_operand" "0")
5455 (parallel [(const_int 0)]))
5456 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5457 (ss_plus:HI
5458 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5459 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5460 (vec_concat:V2HI
5461 (ss_plus:HI
5462 (vec_select:HI
5463 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5464 (parallel [(const_int 0)]))
5465 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5466 (ss_plus:HI
5467 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5468 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5469 "TARGET_SSSE3"
5470 "phaddsw\t{%2, %0|%0, %2}"
5471 [(set_attr "type" "sseiadd")
5472 (set_attr "prefix_extra" "1")
5473 (set_attr "mode" "DI")])
5474
5475 (define_insn "ssse3_phsubwv8hi3"
5476 [(set (match_operand:V8HI 0 "register_operand" "=x")
5477 (vec_concat:V8HI
5478 (vec_concat:V4HI
5479 (vec_concat:V2HI
5480 (minus:HI
5481 (vec_select:HI
5482 (match_operand:V8HI 1 "register_operand" "0")
5483 (parallel [(const_int 0)]))
5484 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5485 (minus:HI
5486 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5487 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5488 (vec_concat:V2HI
5489 (minus:HI
5490 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5491 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5492 (minus:HI
5493 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5494 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5495 (vec_concat:V4HI
5496 (vec_concat:V2HI
5497 (minus:HI
5498 (vec_select:HI
5499 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5500 (parallel [(const_int 0)]))
5501 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5502 (minus:HI
5503 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5504 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5505 (vec_concat:V2HI
5506 (minus:HI
5507 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5508 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5509 (minus:HI
5510 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5511 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5512 "TARGET_SSSE3"
5513 "phsubw\t{%2, %0|%0, %2}"
5514 [(set_attr "type" "sseiadd")
5515 (set_attr "prefix_data16" "1")
5516 (set_attr "prefix_extra" "1")
5517 (set_attr "mode" "TI")])
5518
5519 (define_insn "ssse3_phsubwv4hi3"
5520 [(set (match_operand:V4HI 0 "register_operand" "=y")
5521 (vec_concat:V4HI
5522 (vec_concat:V2HI
5523 (minus:HI
5524 (vec_select:HI
5525 (match_operand:V4HI 1 "register_operand" "0")
5526 (parallel [(const_int 0)]))
5527 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5528 (minus:HI
5529 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5530 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5531 (vec_concat:V2HI
5532 (minus:HI
5533 (vec_select:HI
5534 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5535 (parallel [(const_int 0)]))
5536 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5537 (minus:HI
5538 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5539 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5540 "TARGET_SSSE3"
5541 "phsubw\t{%2, %0|%0, %2}"
5542 [(set_attr "type" "sseiadd")
5543 (set_attr "prefix_extra" "1")
5544 (set_attr "mode" "DI")])
5545
5546 (define_insn "ssse3_phsubdv4si3"
5547 [(set (match_operand:V4SI 0 "register_operand" "=x")
5548 (vec_concat:V4SI
5549 (vec_concat:V2SI
5550 (minus:SI
5551 (vec_select:SI
5552 (match_operand:V4SI 1 "register_operand" "0")
5553 (parallel [(const_int 0)]))
5554 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5555 (minus:SI
5556 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
5557 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
5558 (vec_concat:V2SI
5559 (minus:SI
5560 (vec_select:SI
5561 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5562 (parallel [(const_int 0)]))
5563 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
5564 (minus:SI
5565 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
5566 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
5567 "TARGET_SSSE3"
5568 "phsubd\t{%2, %0|%0, %2}"
5569 [(set_attr "type" "sseiadd")
5570 (set_attr "prefix_data16" "1")
5571 (set_attr "prefix_extra" "1")
5572 (set_attr "mode" "TI")])
5573
5574 (define_insn "ssse3_phsubdv2si3"
5575 [(set (match_operand:V2SI 0 "register_operand" "=y")
5576 (vec_concat:V2SI
5577 (minus:SI
5578 (vec_select:SI
5579 (match_operand:V2SI 1 "register_operand" "0")
5580 (parallel [(const_int 0)]))
5581 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
5582 (minus:SI
5583 (vec_select:SI
5584 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
5585 (parallel [(const_int 0)]))
5586 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
5587 "TARGET_SSSE3"
5588 "phsubd\t{%2, %0|%0, %2}"
5589 [(set_attr "type" "sseiadd")
5590 (set_attr "prefix_extra" "1")
5591 (set_attr "mode" "DI")])
5592
5593 (define_insn "ssse3_phsubswv8hi3"
5594 [(set (match_operand:V8HI 0 "register_operand" "=x")
5595 (vec_concat:V8HI
5596 (vec_concat:V4HI
5597 (vec_concat:V2HI
5598 (ss_minus:HI
5599 (vec_select:HI
5600 (match_operand:V8HI 1 "register_operand" "0")
5601 (parallel [(const_int 0)]))
5602 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5603 (ss_minus:HI
5604 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5605 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5606 (vec_concat:V2HI
5607 (ss_minus:HI
5608 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
5609 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
5610 (ss_minus:HI
5611 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
5612 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
5613 (vec_concat:V4HI
5614 (vec_concat:V2HI
5615 (ss_minus:HI
5616 (vec_select:HI
5617 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5618 (parallel [(const_int 0)]))
5619 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5620 (ss_minus:HI
5621 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5622 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
5623 (vec_concat:V2HI
5624 (ss_minus:HI
5625 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
5626 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
5627 (ss_minus:HI
5628 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
5629 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
5630 "TARGET_SSSE3"
5631 "phsubsw\t{%2, %0|%0, %2}"
5632 [(set_attr "type" "sseiadd")
5633 (set_attr "prefix_data16" "1")
5634 (set_attr "prefix_extra" "1")
5635 (set_attr "mode" "TI")])
5636
5637 (define_insn "ssse3_phsubswv4hi3"
5638 [(set (match_operand:V4HI 0 "register_operand" "=y")
5639 (vec_concat:V4HI
5640 (vec_concat:V2HI
5641 (ss_minus:HI
5642 (vec_select:HI
5643 (match_operand:V4HI 1 "register_operand" "0")
5644 (parallel [(const_int 0)]))
5645 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
5646 (ss_minus:HI
5647 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
5648 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
5649 (vec_concat:V2HI
5650 (ss_minus:HI
5651 (vec_select:HI
5652 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
5653 (parallel [(const_int 0)]))
5654 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
5655 (ss_minus:HI
5656 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
5657 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
5658 "TARGET_SSSE3"
5659 "phsubsw\t{%2, %0|%0, %2}"
5660 [(set_attr "type" "sseiadd")
5661 (set_attr "prefix_extra" "1")
5662 (set_attr "mode" "DI")])
5663
5664 (define_insn "ssse3_pmaddubswv8hi3"
5665 [(set (match_operand:V8HI 0 "register_operand" "=x")
5666 (ss_plus:V8HI
5667 (mult:V8HI
5668 (zero_extend:V8HI
5669 (vec_select:V4QI
5670 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
5671 (parallel [(const_int 0)
5672 (const_int 2)
5673 (const_int 4)
5674 (const_int 6)
5675 (const_int 8)
5676 (const_int 10)
5677 (const_int 12)
5678 (const_int 14)])))
5679 (sign_extend:V8HI
5680 (vec_select:V8QI
5681 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
5682 (parallel [(const_int 0)
5683 (const_int 2)
5684 (const_int 4)
5685 (const_int 6)
5686 (const_int 8)
5687 (const_int 10)
5688 (const_int 12)
5689 (const_int 14)]))))
5690 (mult:V8HI
5691 (zero_extend:V8HI
5692 (vec_select:V16QI (match_dup 1)
5693 (parallel [(const_int 1)
5694 (const_int 3)
5695 (const_int 5)
5696 (const_int 7)
5697 (const_int 9)
5698 (const_int 11)
5699 (const_int 13)
5700 (const_int 15)])))
5701 (sign_extend:V8HI
5702 (vec_select:V16QI (match_dup 2)
5703 (parallel [(const_int 1)
5704 (const_int 3)
5705 (const_int 5)
5706 (const_int 7)
5707 (const_int 9)
5708 (const_int 11)
5709 (const_int 13)
5710 (const_int 15)]))))))]
5711 "TARGET_SSSE3"
5712 "pmaddubsw\t{%2, %0|%0, %2}"
5713 [(set_attr "type" "sseiadd")
5714 (set_attr "prefix_data16" "1")
5715 (set_attr "prefix_extra" "1")
5716 (set_attr "mode" "TI")])
5717
5718 (define_insn "ssse3_pmaddubswv4hi3"
5719 [(set (match_operand:V4HI 0 "register_operand" "=y")
5720 (ss_plus:V4HI
5721 (mult:V4HI
5722 (zero_extend:V4HI
5723 (vec_select:V4QI
5724 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
5725 (parallel [(const_int 0)
5726 (const_int 2)
5727 (const_int 4)
5728 (const_int 6)])))
5729 (sign_extend:V4HI
5730 (vec_select:V4QI
5731 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
5732 (parallel [(const_int 0)
5733 (const_int 2)
5734 (const_int 4)
5735 (const_int 6)]))))
5736 (mult:V4HI
5737 (zero_extend:V4HI
5738 (vec_select:V8QI (match_dup 1)
5739 (parallel [(const_int 1)
5740 (const_int 3)
5741 (const_int 5)
5742 (const_int 7)])))
5743 (sign_extend:V4HI
5744 (vec_select:V8QI (match_dup 2)
5745 (parallel [(const_int 1)
5746 (const_int 3)
5747 (const_int 5)
5748 (const_int 7)]))))))]
5749 "TARGET_SSSE3"
5750 "pmaddubsw\t{%2, %0|%0, %2}"
5751 [(set_attr "type" "sseiadd")
5752 (set_attr "prefix_extra" "1")
5753 (set_attr "mode" "DI")])
5754
5755 (define_insn "ssse3_pmulhrswv8hi3"
5756 [(set (match_operand:V8HI 0 "register_operand" "=x")
5757 (truncate:V8HI
5758 (lshiftrt:V8SI
5759 (plus:V8SI
5760 (lshiftrt:V8SI
5761 (mult:V8SI
5762 (sign_extend:V8SI
5763 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5764 (sign_extend:V8SI
5765 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5766 (const_int 14))
5767 (const_vector:V8HI [(const_int 1) (const_int 1)
5768 (const_int 1) (const_int 1)
5769 (const_int 1) (const_int 1)
5770 (const_int 1) (const_int 1)]))
5771 (const_int 1))))]
5772 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5773 "pmulhrsw\t{%2, %0|%0, %2}"
5774 [(set_attr "type" "sseimul")
5775 (set_attr "prefix_data16" "1")
5776 (set_attr "prefix_extra" "1")
5777 (set_attr "mode" "TI")])
5778
5779 (define_insn "ssse3_pmulhrswv4hi3"
5780 [(set (match_operand:V4HI 0 "register_operand" "=y")
5781 (truncate:V4HI
5782 (lshiftrt:V4SI
5783 (plus:V4SI
5784 (lshiftrt:V4SI
5785 (mult:V4SI
5786 (sign_extend:V4SI
5787 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
5788 (sign_extend:V4SI
5789 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
5790 (const_int 14))
5791 (const_vector:V4HI [(const_int 1) (const_int 1)
5792 (const_int 1) (const_int 1)]))
5793 (const_int 1))))]
5794 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
5795 "pmulhrsw\t{%2, %0|%0, %2}"
5796 [(set_attr "type" "sseimul")
5797 (set_attr "prefix_extra" "1")
5798 (set_attr "mode" "DI")])
5799
5800 (define_insn "ssse3_pshufbv16qi3"
5801 [(set (match_operand:V16QI 0 "register_operand" "=x")
5802 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5803 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
5804 UNSPEC_PSHUFB))]
5805 "TARGET_SSSE3"
5806 "pshufb\t{%2, %0|%0, %2}";
5807 [(set_attr "type" "sselog1")
5808 (set_attr "prefix_data16" "1")
5809 (set_attr "prefix_extra" "1")
5810 (set_attr "mode" "TI")])
5811
5812 (define_insn "ssse3_pshufbv8qi3"
5813 [(set (match_operand:V8QI 0 "register_operand" "=y")
5814 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
5815 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
5816 UNSPEC_PSHUFB))]
5817 "TARGET_SSSE3"
5818 "pshufb\t{%2, %0|%0, %2}";
5819 [(set_attr "type" "sselog1")
5820 (set_attr "prefix_extra" "1")
5821 (set_attr "mode" "DI")])
5822
5823 (define_insn "ssse3_psign<mode>3"
5824 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5825 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
5826 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
5827 UNSPEC_PSIGN))]
5828 "TARGET_SSSE3"
5829 "psign<ssevecsize>\t{%2, %0|%0, %2}";
5830 [(set_attr "type" "sselog1")
5831 (set_attr "prefix_data16" "1")
5832 (set_attr "prefix_extra" "1")
5833 (set_attr "mode" "TI")])
5834
5835 (define_insn "ssse3_psign<mode>3"
5836 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5837 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
5838 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
5839 UNSPEC_PSIGN))]
5840 "TARGET_SSSE3"
5841 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
5842 [(set_attr "type" "sselog1")
5843 (set_attr "prefix_extra" "1")
5844 (set_attr "mode" "DI")])
5845
5846 (define_insn "ssse3_palignrti"
5847 [(set (match_operand:TI 0 "register_operand" "=x")
5848 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
5849 (match_operand:TI 2 "nonimmediate_operand" "xm")
5850 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5851 UNSPEC_PALIGNR))]
5852 "TARGET_SSSE3"
5853 {
5854 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5855 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5856 }
5857 [(set_attr "type" "sseishft")
5858 (set_attr "prefix_data16" "1")
5859 (set_attr "prefix_extra" "1")
5860 (set_attr "mode" "TI")])
5861
5862 (define_insn "ssse3_palignrdi"
5863 [(set (match_operand:DI 0 "register_operand" "=y")
5864 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
5865 (match_operand:DI 2 "nonimmediate_operand" "ym")
5866 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
5867 UNSPEC_PALIGNR))]
5868 "TARGET_SSSE3"
5869 {
5870 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
5871 return "palignr\t{%3, %2, %0|%0, %2, %3}";
5872 }
5873 [(set_attr "type" "sseishft")
5874 (set_attr "prefix_extra" "1")
5875 (set_attr "mode" "DI")])
5876
5877 (define_insn "abs<mode>2"
5878 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5879 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
5880 "TARGET_SSSE3"
5881 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
5882 [(set_attr "type" "sselog1")
5883 (set_attr "prefix_data16" "1")
5884 (set_attr "prefix_extra" "1")
5885 (set_attr "mode" "TI")])
5886
5887 (define_insn "abs<mode>2"
5888 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
5889 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
5890 "TARGET_SSSE3"
5891 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
5892 [(set_attr "type" "sselog1")
5893 (set_attr "prefix_extra" "1")
5894 (set_attr "mode" "DI")])
5895
5896 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5897 ;;
5898 ;; AMD SSE4A instructions
5899 ;;
5900 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5901
5902 (define_insn "sse4a_vmmovntv2df"
5903 [(set (match_operand:DF 0 "memory_operand" "=m")
5904 (unspec:DF [(vec_select:DF
5905 (match_operand:V2DF 1 "register_operand" "x")
5906 (parallel [(const_int 0)]))]
5907 UNSPEC_MOVNT))]
5908 "TARGET_SSE4A"
5909 "movntsd\t{%1, %0|%0, %1}"
5910 [(set_attr "type" "ssemov")
5911 (set_attr "mode" "DF")])
5912
5913 (define_insn "sse4a_movntdf"
5914 [(set (match_operand:DF 0 "memory_operand" "=m")
5915 (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
5916 UNSPEC_MOVNT))]
5917 "TARGET_SSE4A"
5918 "movntsd\t{%1, %0|%0, %1}"
5919 [(set_attr "type" "ssemov")
5920 (set_attr "mode" "DF")])
5921
5922 (define_insn "sse4a_vmmovntv4sf"
5923 [(set (match_operand:SF 0 "memory_operand" "=m")
5924 (unspec:SF [(vec_select:SF
5925 (match_operand:V4SF 1 "register_operand" "x")
5926 (parallel [(const_int 0)]))]
5927 UNSPEC_MOVNT))]
5928 "TARGET_SSE4A"
5929 "movntss\t{%1, %0|%0, %1}"
5930 [(set_attr "type" "ssemov")
5931 (set_attr "mode" "SF")])
5932
5933 (define_insn "sse4a_movntsf"
5934 [(set (match_operand:SF 0 "memory_operand" "=m")
5935 (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
5936 UNSPEC_MOVNT))]
5937 "TARGET_SSE4A"
5938 "movntss\t{%1, %0|%0, %1}"
5939 [(set_attr "type" "ssemov")
5940 (set_attr "mode" "SF")])
5941
5942 (define_insn "sse4a_extrqi"
5943 [(set (match_operand:V2DI 0 "register_operand" "=x")
5944 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5945 (match_operand 2 "const_int_operand" "")
5946 (match_operand 3 "const_int_operand" "")]
5947 UNSPEC_EXTRQI))]
5948 "TARGET_SSE4A"
5949 "extrq\t{%3, %2, %0|%0, %2, %3}"
5950 [(set_attr "type" "sse")
5951 (set_attr "prefix_data16" "1")
5952 (set_attr "mode" "TI")])
5953
5954 (define_insn "sse4a_extrq"
5955 [(set (match_operand:V2DI 0 "register_operand" "=x")
5956 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5957 (match_operand:V16QI 2 "register_operand" "x")]
5958 UNSPEC_EXTRQ))]
5959 "TARGET_SSE4A"
5960 "extrq\t{%2, %0|%0, %2}"
5961 [(set_attr "type" "sse")
5962 (set_attr "prefix_data16" "1")
5963 (set_attr "mode" "TI")])
5964
5965 (define_insn "sse4a_insertqi"
5966 [(set (match_operand:V2DI 0 "register_operand" "=x")
5967 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5968 (match_operand:V2DI 2 "register_operand" "x")
5969 (match_operand 3 "const_int_operand" "")
5970 (match_operand 4 "const_int_operand" "")]
5971 UNSPEC_INSERTQI))]
5972 "TARGET_SSE4A"
5973 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
5974 [(set_attr "type" "sseins")
5975 (set_attr "prefix_rep" "1")
5976 (set_attr "mode" "TI")])
5977
5978 (define_insn "sse4a_insertq"
5979 [(set (match_operand:V2DI 0 "register_operand" "=x")
5980 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5981 (match_operand:V2DI 2 "register_operand" "x")]
5982 UNSPEC_INSERTQ))]
5983 "TARGET_SSE4A"
5984 "insertq\t{%2, %0|%0, %2}"
5985 [(set_attr "type" "sseins")
5986 (set_attr "prefix_rep" "1")
5987 (set_attr "mode" "TI")])
5988
5989 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5990 ;;
5991 ;; Intel SSE4.1 instructions
5992 ;;
5993 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5994
5995 (define_insn "sse4_1_blendpd"
5996 [(set (match_operand:V2DF 0 "register_operand" "=x")
5997 (vec_merge:V2DF
5998 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
5999 (match_operand:V2DF 1 "register_operand" "0")
6000 (match_operand:SI 3 "const_0_to_3_operand" "n")))]
6001 "TARGET_SSE4_1"
6002 "blendpd\t{%3, %2, %0|%0, %2, %3}"
6003 [(set_attr "type" "ssemov")
6004 (set_attr "prefix_extra" "1")
6005 (set_attr "mode" "V2DF")])
6006
6007 (define_insn "sse4_1_blendps"
6008 [(set (match_operand:V4SF 0 "register_operand" "=x")
6009 (vec_merge:V4SF
6010 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6011 (match_operand:V4SF 1 "register_operand" "0")
6012 (match_operand:SI 3 "const_0_to_15_operand" "n")))]
6013 "TARGET_SSE4_1"
6014 "blendps\t{%3, %2, %0|%0, %2, %3}"
6015 [(set_attr "type" "ssemov")
6016 (set_attr "prefix_extra" "1")
6017 (set_attr "mode" "V4SF")])
6018
6019 (define_insn "sse4_1_blendvpd"
6020 [(set (match_operand:V2DF 0 "reg_not_xmm0_operand" "=x")
6021 (unspec:V2DF [(match_operand:V2DF 1 "reg_not_xmm0_operand" "0")
6022 (match_operand:V2DF 2 "nonimm_not_xmm0_operand" "xm")
6023 (match_operand:V2DF 3 "register_operand" "Y0")]
6024 UNSPEC_BLENDV))]
6025 "TARGET_SSE4_1"
6026 "blendvpd\t{%3, %2, %0|%0, %2, %3}"
6027 [(set_attr "type" "ssemov")
6028 (set_attr "prefix_extra" "1")
6029 (set_attr "mode" "V2DF")])
6030
6031 (define_insn "sse4_1_blendvps"
6032 [(set (match_operand:V4SF 0 "reg_not_xmm0_operand" "=x")
6033 (unspec:V4SF [(match_operand:V4SF 1 "reg_not_xmm0_operand" "0")
6034 (match_operand:V4SF 2 "nonimm_not_xmm0_operand" "xm")
6035 (match_operand:V4SF 3 "register_operand" "Y0")]
6036 UNSPEC_BLENDV))]
6037 "TARGET_SSE4_1"
6038 "blendvps\t{%3, %2, %0|%0, %2, %3}"
6039 [(set_attr "type" "ssemov")
6040 (set_attr "prefix_extra" "1")
6041 (set_attr "mode" "V4SF")])
6042
6043 (define_insn "sse4_1_dppd"
6044 [(set (match_operand:V2DF 0 "register_operand" "=x")
6045 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
6046 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
6047 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6048 UNSPEC_DP))]
6049 "TARGET_SSE4_1"
6050 "dppd\t{%3, %2, %0|%0, %2, %3}"
6051 [(set_attr "type" "ssemul")
6052 (set_attr "prefix_extra" "1")
6053 (set_attr "mode" "V2DF")])
6054
6055 (define_insn "sse4_1_dpps"
6056 [(set (match_operand:V4SF 0 "register_operand" "=x")
6057 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
6058 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
6059 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6060 UNSPEC_DP))]
6061 "TARGET_SSE4_1"
6062 "dpps\t{%3, %2, %0|%0, %2, %3}"
6063 [(set_attr "type" "ssemul")
6064 (set_attr "prefix_extra" "1")
6065 (set_attr "mode" "V4SF")])
6066
6067 (define_insn "sse4_1_movntdqa"
6068 [(set (match_operand:V2DI 0 "register_operand" "=x")
6069 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
6070 UNSPEC_MOVNTDQA))]
6071 "TARGET_SSE4_1"
6072 "movntdqa\t{%1, %0|%0, %1}"
6073 [(set_attr "type" "ssecvt")
6074 (set_attr "prefix_extra" "1")
6075 (set_attr "mode" "TI")])
6076
6077 (define_insn "sse4_1_mpsadbw"
6078 [(set (match_operand:V16QI 0 "register_operand" "=x")
6079 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
6080 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
6081 (match_operand:SI 3 "const_0_to_255_operand" "n")]
6082 UNSPEC_MPSADBW))]
6083 "TARGET_SSE4_1"
6084 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
6085 [(set_attr "type" "sselog1")
6086 (set_attr "prefix_extra" "1")
6087 (set_attr "mode" "TI")])
6088
6089 (define_insn "sse4_1_packusdw"
6090 [(set (match_operand:V8HI 0 "register_operand" "=x")
6091 (vec_concat:V8HI
6092 (us_truncate:V4HI
6093 (match_operand:V4SI 1 "register_operand" "0"))
6094 (us_truncate:V4HI
6095 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6096 "TARGET_SSE4_1"
6097 "packusdw\t{%2, %0|%0, %2}"
6098 [(set_attr "type" "sselog")
6099 (set_attr "prefix_extra" "1")
6100 (set_attr "mode" "TI")])
6101
6102 (define_insn "sse4_1_pblendvb"
6103 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
6104 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
6105 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
6106 (match_operand:V16QI 3 "register_operand" "Y0")]
6107 UNSPEC_BLENDV))]
6108 "TARGET_SSE4_1"
6109 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
6110 [(set_attr "type" "ssemov")
6111 (set_attr "prefix_extra" "1")
6112 (set_attr "mode" "TI")])
6113
6114 (define_insn "sse4_1_pblendw"
6115 [(set (match_operand:V8HI 0 "register_operand" "=x")
6116 (vec_merge:V8HI
6117 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
6118 (match_operand:V8HI 1 "register_operand" "0")
6119 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
6120 "TARGET_SSE4_1"
6121 "pblendw\t{%3, %2, %0|%0, %2, %3}"
6122 [(set_attr "type" "ssemov")
6123 (set_attr "prefix_extra" "1")
6124 (set_attr "mode" "TI")])
6125
6126 (define_insn "sse4_1_phminposuw"
6127 [(set (match_operand:V8HI 0 "register_operand" "=x")
6128 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
6129 UNSPEC_PHMINPOSUW))]
6130 "TARGET_SSE4_1"
6131 "phminposuw\t{%1, %0|%0, %1}"
6132 [(set_attr "type" "sselog1")
6133 (set_attr "prefix_extra" "1")
6134 (set_attr "mode" "TI")])
6135
6136 (define_insn "sse4_1_extendv8qiv8hi2"
6137 [(set (match_operand:V8HI 0 "register_operand" "=x")
6138 (sign_extend:V8HI
6139 (vec_select:V8QI
6140 (match_operand:V16QI 1 "register_operand" "x")
6141 (parallel [(const_int 0)
6142 (const_int 1)
6143 (const_int 2)
6144 (const_int 3)
6145 (const_int 4)
6146 (const_int 5)
6147 (const_int 6)
6148 (const_int 7)]))))]
6149 "TARGET_SSE4_1"
6150 "pmovsxbw\t{%1, %0|%0, %1}"
6151 [(set_attr "type" "ssemov")
6152 (set_attr "prefix_extra" "1")
6153 (set_attr "mode" "TI")])
6154
6155 (define_insn "*sse4_1_extendv8qiv8hi2"
6156 [(set (match_operand:V8HI 0 "register_operand" "=x")
6157 (sign_extend:V8HI
6158 (vec_select:V8QI
6159 (vec_duplicate:V16QI
6160 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6161 (parallel [(const_int 0)
6162 (const_int 1)
6163 (const_int 2)
6164 (const_int 3)
6165 (const_int 4)
6166 (const_int 5)
6167 (const_int 6)
6168 (const_int 7)]))))]
6169 "TARGET_SSE4_1"
6170 "pmovsxbw\t{%1, %0|%0, %1}"
6171 [(set_attr "type" "ssemov")
6172 (set_attr "prefix_extra" "1")
6173 (set_attr "mode" "TI")])
6174
6175 (define_insn "sse4_1_extendv4qiv4si2"
6176 [(set (match_operand:V4SI 0 "register_operand" "=x")
6177 (sign_extend:V4SI
6178 (vec_select:V4QI
6179 (match_operand:V16QI 1 "register_operand" "x")
6180 (parallel [(const_int 0)
6181 (const_int 1)
6182 (const_int 2)
6183 (const_int 3)]))))]
6184 "TARGET_SSE4_1"
6185 "pmovsxbd\t{%1, %0|%0, %1}"
6186 [(set_attr "type" "ssemov")
6187 (set_attr "prefix_extra" "1")
6188 (set_attr "mode" "TI")])
6189
6190 (define_insn "*sse4_1_extendv4qiv4si2"
6191 [(set (match_operand:V4SI 0 "register_operand" "=x")
6192 (sign_extend:V4SI
6193 (vec_select:V4QI
6194 (vec_duplicate:V16QI
6195 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6196 (parallel [(const_int 0)
6197 (const_int 1)
6198 (const_int 2)
6199 (const_int 3)]))))]
6200 "TARGET_SSE4_1"
6201 "pmovsxbd\t{%1, %0|%0, %1}"
6202 [(set_attr "type" "ssemov")
6203 (set_attr "prefix_extra" "1")
6204 (set_attr "mode" "TI")])
6205
6206 (define_insn "sse4_1_extendv2qiv2di2"
6207 [(set (match_operand:V2DI 0 "register_operand" "=x")
6208 (sign_extend:V2DI
6209 (vec_select:V2QI
6210 (match_operand:V16QI 1 "register_operand" "x")
6211 (parallel [(const_int 0)
6212 (const_int 1)]))))]
6213 "TARGET_SSE4_1"
6214 "pmovsxbq\t{%1, %0|%0, %1}"
6215 [(set_attr "type" "ssemov")
6216 (set_attr "prefix_extra" "1")
6217 (set_attr "mode" "TI")])
6218
6219 (define_insn "*sse4_1_extendv2qiv2di2"
6220 [(set (match_operand:V2DI 0 "register_operand" "=x")
6221 (sign_extend:V2DI
6222 (vec_select:V2QI
6223 (vec_duplicate:V16QI
6224 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6225 (parallel [(const_int 0)
6226 (const_int 1)]))))]
6227 "TARGET_SSE4_1"
6228 "pmovsxbq\t{%1, %0|%0, %1}"
6229 [(set_attr "type" "ssemov")
6230 (set_attr "prefix_extra" "1")
6231 (set_attr "mode" "TI")])
6232
6233 (define_insn "sse4_1_extendv4hiv4si2"
6234 [(set (match_operand:V4SI 0 "register_operand" "=x")
6235 (sign_extend:V4SI
6236 (vec_select:V4HI
6237 (match_operand:V8HI 1 "register_operand" "x")
6238 (parallel [(const_int 0)
6239 (const_int 1)
6240 (const_int 2)
6241 (const_int 3)]))))]
6242 "TARGET_SSE4_1"
6243 "pmovsxwd\t{%1, %0|%0, %1}"
6244 [(set_attr "type" "ssemov")
6245 (set_attr "prefix_extra" "1")
6246 (set_attr "mode" "TI")])
6247
6248 (define_insn "*sse4_1_extendv4hiv4si2"
6249 [(set (match_operand:V4SI 0 "register_operand" "=x")
6250 (sign_extend:V4SI
6251 (vec_select:V4HI
6252 (vec_duplicate:V8HI
6253 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6254 (parallel [(const_int 0)
6255 (const_int 1)
6256 (const_int 2)
6257 (const_int 3)]))))]
6258 "TARGET_SSE4_1"
6259 "pmovsxwd\t{%1, %0|%0, %1}"
6260 [(set_attr "type" "ssemov")
6261 (set_attr "prefix_extra" "1")
6262 (set_attr "mode" "TI")])
6263
6264 (define_insn "sse4_1_extendv2hiv2di2"
6265 [(set (match_operand:V2DI 0 "register_operand" "=x")
6266 (sign_extend:V2DI
6267 (vec_select:V2HI
6268 (match_operand:V8HI 1 "register_operand" "x")
6269 (parallel [(const_int 0)
6270 (const_int 1)]))))]
6271 "TARGET_SSE4_1"
6272 "pmovsxwq\t{%1, %0|%0, %1}"
6273 [(set_attr "type" "ssemov")
6274 (set_attr "prefix_extra" "1")
6275 (set_attr "mode" "TI")])
6276
6277 (define_insn "*sse4_1_extendv2hiv2di2"
6278 [(set (match_operand:V2DI 0 "register_operand" "=x")
6279 (sign_extend:V2DI
6280 (vec_select:V2HI
6281 (vec_duplicate:V8HI
6282 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
6283 (parallel [(const_int 0)
6284 (const_int 1)]))))]
6285 "TARGET_SSE4_1"
6286 "pmovsxwq\t{%1, %0|%0, %1}"
6287 [(set_attr "type" "ssemov")
6288 (set_attr "prefix_extra" "1")
6289 (set_attr "mode" "TI")])
6290
6291 (define_insn "sse4_1_extendv2siv2di2"
6292 [(set (match_operand:V2DI 0 "register_operand" "=x")
6293 (sign_extend:V2DI
6294 (vec_select:V2SI
6295 (match_operand:V4SI 1 "register_operand" "x")
6296 (parallel [(const_int 0)
6297 (const_int 1)]))))]
6298 "TARGET_SSE4_1"
6299 "pmovsxdq\t{%1, %0|%0, %1}"
6300 [(set_attr "type" "ssemov")
6301 (set_attr "prefix_extra" "1")
6302 (set_attr "mode" "TI")])
6303
6304 (define_insn "*sse4_1_extendv2siv2di2"
6305 [(set (match_operand:V2DI 0 "register_operand" "=x")
6306 (sign_extend:V2DI
6307 (vec_select:V2SI
6308 (vec_duplicate:V4SI
6309 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6310 (parallel [(const_int 0)
6311 (const_int 1)]))))]
6312 "TARGET_SSE4_1"
6313 "pmovsxdq\t{%1, %0|%0, %1}"
6314 [(set_attr "type" "ssemov")
6315 (set_attr "prefix_extra" "1")
6316 (set_attr "mode" "TI")])
6317
6318 (define_insn "sse4_1_zero_extendv8qiv8hi2"
6319 [(set (match_operand:V8HI 0 "register_operand" "=x")
6320 (zero_extend:V8HI
6321 (vec_select:V8QI
6322 (match_operand:V16QI 1 "register_operand" "x")
6323 (parallel [(const_int 0)
6324 (const_int 1)
6325 (const_int 2)
6326 (const_int 3)
6327 (const_int 4)
6328 (const_int 5)
6329 (const_int 6)
6330 (const_int 7)]))))]
6331 "TARGET_SSE4_1"
6332 "pmovzxbw\t{%1, %0|%0, %1}"
6333 [(set_attr "type" "ssemov")
6334 (set_attr "prefix_extra" "1")
6335 (set_attr "mode" "TI")])
6336
6337 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
6338 [(set (match_operand:V8HI 0 "register_operand" "=x")
6339 (zero_extend:V8HI
6340 (vec_select:V8QI
6341 (vec_duplicate:V16QI
6342 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
6343 (parallel [(const_int 0)
6344 (const_int 1)
6345 (const_int 2)
6346 (const_int 3)
6347 (const_int 4)
6348 (const_int 5)
6349 (const_int 6)
6350 (const_int 7)]))))]
6351 "TARGET_SSE4_1"
6352 "pmovzxbw\t{%1, %0|%0, %1}"
6353 [(set_attr "type" "ssemov")
6354 (set_attr "prefix_extra" "1")
6355 (set_attr "mode" "TI")])
6356
6357 (define_insn "sse4_1_zero_extendv4qiv4si2"
6358 [(set (match_operand:V4SI 0 "register_operand" "=x")
6359 (zero_extend:V4SI
6360 (vec_select:V4QI
6361 (match_operand:V16QI 1 "register_operand" "x")
6362 (parallel [(const_int 0)
6363 (const_int 1)
6364 (const_int 2)
6365 (const_int 3)]))))]
6366 "TARGET_SSE4_1"
6367 "pmovzxbd\t{%1, %0|%0, %1}"
6368 [(set_attr "type" "ssemov")
6369 (set_attr "prefix_extra" "1")
6370 (set_attr "mode" "TI")])
6371
6372 (define_insn "*sse4_1_zero_extendv4qiv4si2"
6373 [(set (match_operand:V4SI 0 "register_operand" "=x")
6374 (zero_extend:V4SI
6375 (vec_select:V4QI
6376 (vec_duplicate:V16QI
6377 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
6378 (parallel [(const_int 0)
6379 (const_int 1)
6380 (const_int 2)
6381 (const_int 3)]))))]
6382 "TARGET_SSE4_1"
6383 "pmovzxbd\t{%1, %0|%0, %1}"
6384 [(set_attr "type" "ssemov")
6385 (set_attr "prefix_extra" "1")
6386 (set_attr "mode" "TI")])
6387
6388 (define_insn "sse4_1_zero_extendv2qiv2di2"
6389 [(set (match_operand:V2DI 0 "register_operand" "=x")
6390 (zero_extend:V2DI
6391 (vec_select:V2QI
6392 (match_operand:V16QI 1 "register_operand" "x")
6393 (parallel [(const_int 0)
6394 (const_int 1)]))))]
6395 "TARGET_SSE4_1"
6396 "pmovzxbq\t{%1, %0|%0, %1}"
6397 [(set_attr "type" "ssemov")
6398 (set_attr "prefix_extra" "1")
6399 (set_attr "mode" "TI")])
6400
6401 (define_insn "*sse4_1_zero_extendv2qiv2di2"
6402 [(set (match_operand:V2DI 0 "register_operand" "=x")
6403 (zero_extend:V2DI
6404 (vec_select:V2QI
6405 (vec_duplicate:V16QI
6406 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
6407 (parallel [(const_int 0)
6408 (const_int 1)]))))]
6409 "TARGET_SSE4_1"
6410 "pmovzxbq\t{%1, %0|%0, %1}"
6411 [(set_attr "type" "ssemov")
6412 (set_attr "prefix_extra" "1")
6413 (set_attr "mode" "TI")])
6414
6415 (define_insn "sse4_1_zero_extendv4hiv4si2"
6416 [(set (match_operand:V4SI 0 "register_operand" "=x")
6417 (zero_extend:V4SI
6418 (vec_select:V4HI
6419 (match_operand:V8HI 1 "register_operand" "x")
6420 (parallel [(const_int 0)
6421 (const_int 1)
6422 (const_int 2)
6423 (const_int 3)]))))]
6424 "TARGET_SSE4_1"
6425 "pmovzxwd\t{%1, %0|%0, %1}"
6426 [(set_attr "type" "ssemov")
6427 (set_attr "prefix_extra" "1")
6428 (set_attr "mode" "TI")])
6429
6430 (define_insn "*sse4_1_zero_extendv4hiv4si2"
6431 [(set (match_operand:V4SI 0 "register_operand" "=x")
6432 (zero_extend:V4SI
6433 (vec_select:V4HI
6434 (vec_duplicate:V8HI
6435 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
6436 (parallel [(const_int 0)
6437 (const_int 1)
6438 (const_int 2)
6439 (const_int 3)]))))]
6440 "TARGET_SSE4_1"
6441 "pmovzxwd\t{%1, %0|%0, %1}"
6442 [(set_attr "type" "ssemov")
6443 (set_attr "prefix_extra" "1")
6444 (set_attr "mode" "TI")])
6445
6446 (define_insn "sse4_1_zero_extendv2hiv2di2"
6447 [(set (match_operand:V2DI 0 "register_operand" "=x")
6448 (zero_extend:V2DI
6449 (vec_select:V2HI
6450 (match_operand:V8HI 1 "register_operand" "x")
6451 (parallel [(const_int 0)
6452 (const_int 1)]))))]
6453 "TARGET_SSE4_1"
6454 "pmovzxwq\t{%1, %0|%0, %1}"
6455 [(set_attr "type" "ssemov")
6456 (set_attr "prefix_extra" "1")
6457 (set_attr "mode" "TI")])
6458
6459 (define_insn "*sse4_1_zero_extendv2hiv2di2"
6460 [(set (match_operand:V2DI 0 "register_operand" "=x")
6461 (zero_extend:V2DI
6462 (vec_select:V2HI
6463 (vec_duplicate:V8HI
6464 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
6465 (parallel [(const_int 0)
6466 (const_int 1)]))))]
6467 "TARGET_SSE4_1"
6468 "pmovzxwq\t{%1, %0|%0, %1}"
6469 [(set_attr "type" "ssemov")
6470 (set_attr "prefix_extra" "1")
6471 (set_attr "mode" "TI")])
6472
6473 (define_insn "sse4_1_zero_extendv2siv2di2"
6474 [(set (match_operand:V2DI 0 "register_operand" "=x")
6475 (zero_extend:V2DI
6476 (vec_select:V2SI
6477 (match_operand:V4SI 1 "register_operand" "x")
6478 (parallel [(const_int 0)
6479 (const_int 1)]))))]
6480 "TARGET_SSE4_1"
6481 "pmovzxdq\t{%1, %0|%0, %1}"
6482 [(set_attr "type" "ssemov")
6483 (set_attr "prefix_extra" "1")
6484 (set_attr "mode" "TI")])
6485
6486 (define_insn "*sse4_1_zero_extendv2siv2di2"
6487 [(set (match_operand:V2DI 0 "register_operand" "=x")
6488 (zero_extend:V2DI
6489 (vec_select:V2SI
6490 (vec_duplicate:V4SI
6491 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
6492 (parallel [(const_int 0)
6493 (const_int 1)]))))]
6494 "TARGET_SSE4_1"
6495 "pmovzxdq\t{%1, %0|%0, %1}"
6496 [(set_attr "type" "ssemov")
6497 (set_attr "prefix_extra" "1")
6498 (set_attr "mode" "TI")])
6499
6500 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
6501 ;; But it is not a really compare instruction.
6502 (define_insn "sse4_1_ptest"
6503 [(set (reg:CC FLAGS_REG)
6504 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
6505 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
6506 UNSPEC_PTEST))]
6507 "TARGET_SSE4_1"
6508 "ptest\t{%1, %0|%0, %1}"
6509 [(set_attr "type" "ssecomi")
6510 (set_attr "prefix_extra" "1")
6511 (set_attr "mode" "TI")])
6512
6513 (define_insn "sse4_1_roundpd"
6514 [(set (match_operand:V2DF 0 "register_operand" "=x")
6515 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
6516 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6517 UNSPEC_ROUND))]
6518 "TARGET_SSE4_1"
6519 "roundpd\t{%2, %1, %0|%0, %1, %2}"
6520 [(set_attr "type" "ssecvt")
6521 (set_attr "prefix_extra" "1")
6522 (set_attr "mode" "V2DF")])
6523
6524 (define_insn "sse4_1_roundps"
6525 [(set (match_operand:V4SF 0 "register_operand" "=x")
6526 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
6527 (match_operand:SI 2 "const_0_to_15_operand" "n")]
6528 UNSPEC_ROUND))]
6529 "TARGET_SSE4_1"
6530 "roundps\t{%2, %1, %0|%0, %1, %2}"
6531 [(set_attr "type" "ssecvt")
6532 (set_attr "prefix_extra" "1")
6533 (set_attr "mode" "V4SF")])
6534
6535 (define_insn "sse4_1_roundsd"
6536 [(set (match_operand:V2DF 0 "register_operand" "=x")
6537 (vec_merge:V2DF
6538 (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
6539 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6540 UNSPEC_ROUND)
6541 (match_operand:V2DF 1 "register_operand" "0")
6542 (const_int 1)))]
6543 "TARGET_SSE4_1"
6544 "roundsd\t{%3, %2, %0|%0, %2, %3}"
6545 [(set_attr "type" "ssecvt")
6546 (set_attr "prefix_extra" "1")
6547 (set_attr "mode" "V2DF")])
6548
6549 (define_insn "sse4_1_roundss"
6550 [(set (match_operand:V4SF 0 "register_operand" "=x")
6551 (vec_merge:V4SF
6552 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
6553 (match_operand:SI 3 "const_0_to_15_operand" "n")]
6554 UNSPEC_ROUND)
6555 (match_operand:V4SF 1 "register_operand" "0")
6556 (const_int 1)))]
6557 "TARGET_SSE4_1"
6558 "roundss\t{%3, %2, %0|%0, %2, %3}"
6559 [(set_attr "type" "ssecvt")
6560 (set_attr "prefix_extra" "1")
6561 (set_attr "mode" "V4SF")])
6562
6563 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6564 ;;
6565 ;; Intel SSE4.2 string/text processing instructions
6566 ;;
6567 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6568
6569 (define_insn_and_split "sse4_2_pcmpestr"
6570 [(set (match_operand:SI 0 "register_operand" "=c,c")
6571 (unspec:SI
6572 [(match_operand:V16QI 2 "register_operand" "x,x")
6573 (match_operand:SI 3 "register_operand" "a,a")
6574 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
6575 (match_operand:SI 5 "register_operand" "d,d")
6576 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
6577 UNSPEC_PCMPESTR))
6578 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6579 (unspec:V16QI
6580 [(match_dup 2)
6581 (match_dup 3)
6582 (match_dup 4)
6583 (match_dup 5)
6584 (match_dup 6)]
6585 UNSPEC_PCMPESTR))
6586 (set (reg:CC FLAGS_REG)
6587 (unspec:CC
6588 [(match_dup 2)
6589 (match_dup 3)
6590 (match_dup 4)
6591 (match_dup 5)
6592 (match_dup 6)]
6593 UNSPEC_PCMPESTR))]
6594 "TARGET_SSE4_2
6595 && !(reload_completed || reload_in_progress)"
6596 "#"
6597 "&& 1"
6598 [(const_int 0)]
6599 {
6600 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6601 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6602 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6603
6604 if (ecx)
6605 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
6606 operands[3], operands[4],
6607 operands[5], operands[6]));
6608 if (xmm0)
6609 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
6610 operands[3], operands[4],
6611 operands[5], operands[6]));
6612 if (flags && !(ecx || xmm0))
6613 emit_insn (gen_sse4_2_pcmpestr_cconly (operands[2], operands[3],
6614 operands[4], operands[5],
6615 operands[6]));
6616 DONE;
6617 }
6618 [(set_attr "type" "sselog")
6619 (set_attr "prefix_data16" "1")
6620 (set_attr "prefix_extra" "1")
6621 (set_attr "memory" "none,load")
6622 (set_attr "mode" "TI")])
6623
6624 (define_insn "sse4_2_pcmpestri"
6625 [(set (match_operand:SI 0 "register_operand" "=c,c")
6626 (unspec:SI
6627 [(match_operand:V16QI 1 "register_operand" "x,x")
6628 (match_operand:SI 2 "register_operand" "a,a")
6629 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6630 (match_operand:SI 4 "register_operand" "d,d")
6631 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6632 UNSPEC_PCMPESTR))
6633 (set (reg:CC FLAGS_REG)
6634 (unspec:CC
6635 [(match_dup 1)
6636 (match_dup 2)
6637 (match_dup 3)
6638 (match_dup 4)
6639 (match_dup 5)]
6640 UNSPEC_PCMPESTR))]
6641 "TARGET_SSE4_2"
6642 "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
6643 [(set_attr "type" "sselog")
6644 (set_attr "prefix_data16" "1")
6645 (set_attr "prefix_extra" "1")
6646 (set_attr "memory" "none,load")
6647 (set_attr "mode" "TI")])
6648
6649 (define_insn "sse4_2_pcmpestrm"
6650 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6651 (unspec:V16QI
6652 [(match_operand:V16QI 1 "register_operand" "x,x")
6653 (match_operand:SI 2 "register_operand" "a,a")
6654 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6655 (match_operand:SI 4 "register_operand" "d,d")
6656 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
6657 UNSPEC_PCMPESTR))
6658 (set (reg:CC FLAGS_REG)
6659 (unspec:CC
6660 [(match_dup 1)
6661 (match_dup 2)
6662 (match_dup 3)
6663 (match_dup 4)
6664 (match_dup 5)]
6665 UNSPEC_PCMPESTR))]
6666 "TARGET_SSE4_2"
6667 "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
6668 [(set_attr "type" "sselog")
6669 (set_attr "prefix_data16" "1")
6670 (set_attr "prefix_extra" "1")
6671 (set_attr "memory" "none,load")
6672 (set_attr "mode" "TI")])
6673
6674 (define_insn "sse4_2_pcmpestr_cconly"
6675 [(set (reg:CC FLAGS_REG)
6676 (unspec:CC
6677 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6678 (match_operand:SI 1 "register_operand" "a,a,a,a")
6679 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x,m")
6680 (match_operand:SI 3 "register_operand" "d,d,d,d")
6681 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
6682 UNSPEC_PCMPESTR))
6683 (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
6684 (clobber (match_scratch:SI 6 "= X, X,c,c"))]
6685 "TARGET_SSE4_2"
6686 "@
6687 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6688 pcmpestrm\t{%4, %2, %0|%0, %2, %4}
6689 pcmpestri\t{%4, %2, %0|%0, %2, %4}
6690 pcmpestri\t{%4, %2, %0|%0, %2, %4}"
6691 [(set_attr "type" "sselog")
6692 (set_attr "prefix_data16" "1")
6693 (set_attr "prefix_extra" "1")
6694 (set_attr "memory" "none,load,none,load")
6695 (set_attr "mode" "TI")])
6696
6697 (define_insn_and_split "sse4_2_pcmpistr"
6698 [(set (match_operand:SI 0 "register_operand" "=c,c")
6699 (unspec:SI
6700 [(match_operand:V16QI 2 "register_operand" "x,x")
6701 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
6702 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
6703 UNSPEC_PCMPISTR))
6704 (set (match_operand:V16QI 1 "register_operand" "=Y0,Y0")
6705 (unspec:V16QI
6706 [(match_dup 2)
6707 (match_dup 3)
6708 (match_dup 4)]
6709 UNSPEC_PCMPISTR))
6710 (set (reg:CC FLAGS_REG)
6711 (unspec:CC
6712 [(match_dup 2)
6713 (match_dup 3)
6714 (match_dup 4)]
6715 UNSPEC_PCMPISTR))]
6716 "TARGET_SSE4_2
6717 && !(reload_completed || reload_in_progress)"
6718 "#"
6719 "&& 1"
6720 [(const_int 0)]
6721 {
6722 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
6723 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
6724 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
6725
6726 if (ecx)
6727 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
6728 operands[3], operands[4]));
6729 if (xmm0)
6730 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
6731 operands[3], operands[4]));
6732 if (flags && !(ecx || xmm0))
6733 emit_insn (gen_sse4_2_pcmpistr_cconly (operands[2], operands[3],
6734 operands[4]));
6735 DONE;
6736 }
6737 [(set_attr "type" "sselog")
6738 (set_attr "prefix_data16" "1")
6739 (set_attr "prefix_extra" "1")
6740 (set_attr "memory" "none,load")
6741 (set_attr "mode" "TI")])
6742
6743 (define_insn "sse4_2_pcmpistri"
6744 [(set (match_operand:SI 0 "register_operand" "=c,c")
6745 (unspec:SI
6746 [(match_operand:V16QI 1 "register_operand" "x,x")
6747 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6748 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6749 UNSPEC_PCMPISTR))
6750 (set (reg:CC FLAGS_REG)
6751 (unspec:CC
6752 [(match_dup 1)
6753 (match_dup 2)
6754 (match_dup 3)]
6755 UNSPEC_PCMPISTR))]
6756 "TARGET_SSE4_2"
6757 "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
6758 [(set_attr "type" "sselog")
6759 (set_attr "prefix_data16" "1")
6760 (set_attr "prefix_extra" "1")
6761 (set_attr "memory" "none,load")
6762 (set_attr "mode" "TI")])
6763
6764 (define_insn "sse4_2_pcmpistrm"
6765 [(set (match_operand:V16QI 0 "register_operand" "=Y0,Y0")
6766 (unspec:V16QI
6767 [(match_operand:V16QI 1 "register_operand" "x,x")
6768 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
6769 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6770 UNSPEC_PCMPISTR))
6771 (set (reg:CC FLAGS_REG)
6772 (unspec:CC
6773 [(match_dup 1)
6774 (match_dup 2)
6775 (match_dup 3)]
6776 UNSPEC_PCMPISTR))]
6777 "TARGET_SSE4_2"
6778 "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
6779 [(set_attr "type" "sselog")
6780 (set_attr "prefix_data16" "1")
6781 (set_attr "prefix_extra" "1")
6782 (set_attr "memory" "none,load")
6783 (set_attr "mode" "TI")])
6784
6785 (define_insn "sse4_2_pcmpistr_cconly"
6786 [(set (reg:CC FLAGS_REG)
6787 (unspec:CC
6788 [(match_operand:V16QI 0 "register_operand" "x,x,x,x")
6789 (match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
6790 (match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
6791 UNSPEC_PCMPISTR))
6792 (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
6793 (clobber (match_scratch:SI 4 "= X, X,c,c"))]
6794 "TARGET_SSE4_2"
6795 "@
6796 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6797 pcmpistrm\t{%2, %1, %0|%0, %1, %2}
6798 pcmpistri\t{%2, %1, %0|%0, %1, %2}
6799 pcmpistri\t{%2, %1, %0|%0, %1, %2}"
6800 [(set_attr "type" "sselog")
6801 (set_attr "prefix_data16" "1")
6802 (set_attr "prefix_extra" "1")
6803 (set_attr "memory" "none,load,none,load")
6804 (set_attr "mode" "TI")])