i386.c (avx_vpermilp_parallel): New function.
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009
3 ;; Free Software Foundation, Inc.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
25
26 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
28
29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
31
32 ;; All 32-byte vector modes handled by AVX
33 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
34
35 ;; All QI vector modes handled by AVX
36 (define_mode_iterator AVXMODEQI [V32QI V16QI])
37
38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
40
41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
43
44 ;; Mix-n-match
45 (define_mode_iterator SSEMODE12 [V16QI V8HI])
46 (define_mode_iterator SSEMODE24 [V8HI V4SI])
47 (define_mode_iterator SSEMODE14 [V16QI V4SI])
48 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
49 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
50 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
51 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
52 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
53 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
54
55 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
56 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
57 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
58 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
59 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
60 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
61 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
62 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
63 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
64 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
65
66 ;; Int-float size matches
67 (define_mode_iterator SSEMODE4S [V4SF V4SI])
68 (define_mode_iterator SSEMODE2D [V2DF V2DI])
69
70 ;; Modes handled by integer vcond pattern
71 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
72 (V2DI "TARGET_SSE4_2")])
73
74 ;; Mapping from float mode to required SSE level
75 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
76
77 ;; Mapping from integer vector mode to mnemonic suffix
78 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
79
80 ;; Mapping of the fma4 suffix
81 (define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")])
82 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
83 (V4SF "ss") (V2DF "sd")])
84
85 ;; Mapping of the avx suffix
86 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
87 (V4SF "ps") (V2DF "pd")])
88
89 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
90
91 ;; Mapping of the max integer size for xop rotate immediate constraint
92 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
93
94 ;; Mapping of vector modes back to the scalar modes
95 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
96 (V16QI "QI") (V8HI "HI")
97 (V4SI "SI") (V2DI "DI")])
98
99 ;; Mapping of vector modes to a vector mode of double size
100 (define_mode_attr ssedoublesizemode
101 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
102 (V8HI "V16HI") (V16QI "V32QI")
103 (V4DF "V8DF") (V8SF "V16SF")
104 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
105
106 ;; Number of scalar elements in each vector type
107 (define_mode_attr ssescalarnum
108 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
109 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
110
111 ;; Mapping for AVX
112 (define_mode_attr avxvecmode
113 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
114 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
115 (V8SF "V8SF") (V4DF "V4DF")])
116 (define_mode_attr avxvecpsmode
117 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
118 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
119 (define_mode_attr avxhalfvecmode
120 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
121 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
122 (define_mode_attr avxscalarmode
123 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
124 (V8SF "SF") (V4DF "DF")])
125 (define_mode_attr avxcvtvecmode
126 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
127 (define_mode_attr avxpermvecmode
128 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
129 (define_mode_attr avxmodesuffixf2c
130 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
131 (define_mode_attr avxmodesuffixp
132 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
133 (V4DF "pd")])
134 (define_mode_attr avxmodesuffix
135 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
136 (V8SI "256") (V8SF "256") (V4DF "256")])
137
138 ;; Mapping of immediate bits for blend instructions
139 (define_mode_attr blendbits
140 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
141
142 ;; Mapping of immediate bits for pinsr instructions
143 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
144
145 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
146
147 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
148 ;;
149 ;; Move patterns
150 ;;
151 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
152
153 (define_expand "mov<mode>"
154 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
155 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
156 "TARGET_AVX"
157 {
158 ix86_expand_vector_move (<MODE>mode, operands);
159 DONE;
160 })
161
162 (define_insn "*avx_mov<mode>_internal"
163 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
164 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
165 "TARGET_AVX
166 && (register_operand (operands[0], <MODE>mode)
167 || register_operand (operands[1], <MODE>mode))"
168 {
169 switch (which_alternative)
170 {
171 case 0:
172 return standard_sse_constant_opcode (insn, operands[1]);
173 case 1:
174 case 2:
175 switch (get_attr_mode (insn))
176 {
177 case MODE_V8SF:
178 case MODE_V4SF:
179 return "vmovaps\t{%1, %0|%0, %1}";
180 case MODE_V4DF:
181 case MODE_V2DF:
182 return "vmovapd\t{%1, %0|%0, %1}";
183 default:
184 return "vmovdqa\t{%1, %0|%0, %1}";
185 }
186 default:
187 gcc_unreachable ();
188 }
189 }
190 [(set_attr "type" "sselog1,ssemov,ssemov")
191 (set_attr "prefix" "vex")
192 (set_attr "mode" "<avxvecmode>")])
193
194 ;; All of these patterns are enabled for SSE1 as well as SSE2.
195 ;; This is essential for maintaining stable calling conventions.
196
197 (define_expand "mov<mode>"
198 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
199 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
200 "TARGET_SSE"
201 {
202 ix86_expand_vector_move (<MODE>mode, operands);
203 DONE;
204 })
205
206 (define_insn "*mov<mode>_internal"
207 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
208 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
209 "TARGET_SSE
210 && (register_operand (operands[0], <MODE>mode)
211 || register_operand (operands[1], <MODE>mode))"
212 {
213 switch (which_alternative)
214 {
215 case 0:
216 return standard_sse_constant_opcode (insn, operands[1]);
217 case 1:
218 case 2:
219 switch (get_attr_mode (insn))
220 {
221 case MODE_V4SF:
222 return "movaps\t{%1, %0|%0, %1}";
223 case MODE_V2DF:
224 return "movapd\t{%1, %0|%0, %1}";
225 default:
226 return "movdqa\t{%1, %0|%0, %1}";
227 }
228 default:
229 gcc_unreachable ();
230 }
231 }
232 [(set_attr "type" "sselog1,ssemov,ssemov")
233 (set (attr "mode")
234 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
235 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
236 (and (eq_attr "alternative" "2")
237 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
238 (const_int 0))))
239 (const_string "V4SF")
240 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
241 (const_string "V4SF")
242 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
243 (const_string "V2DF")
244 ]
245 (const_string "TI")))])
246
247 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
248 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
249 ;; from memory, we'd prefer to load the memory directly into the %xmm
250 ;; register. To facilitate this happy circumstance, this pattern won't
251 ;; split until after register allocation. If the 64-bit value didn't
252 ;; come from memory, this is the best we can do. This is much better
253 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
254 ;; from there.
255
256 (define_insn_and_split "movdi_to_sse"
257 [(parallel
258 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
259 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
260 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
261 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
262 "#"
263 "&& reload_completed"
264 [(const_int 0)]
265 {
266 if (register_operand (operands[1], DImode))
267 {
268 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
269 Assemble the 64-bit DImode value in an xmm register. */
270 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
271 gen_rtx_SUBREG (SImode, operands[1], 0)));
272 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
273 gen_rtx_SUBREG (SImode, operands[1], 4)));
274 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
275 }
276 else if (memory_operand (operands[1], DImode))
277 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
278 else
279 gcc_unreachable ();
280 })
281
282 (define_split
283 [(set (match_operand:V4SF 0 "register_operand" "")
284 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
285 "TARGET_SSE && reload_completed"
286 [(set (match_dup 0)
287 (vec_merge:V4SF
288 (vec_duplicate:V4SF (match_dup 1))
289 (match_dup 2)
290 (const_int 1)))]
291 {
292 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
293 operands[2] = CONST0_RTX (V4SFmode);
294 })
295
296 (define_split
297 [(set (match_operand:V2DF 0 "register_operand" "")
298 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
299 "TARGET_SSE2 && reload_completed"
300 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
301 {
302 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
303 operands[2] = CONST0_RTX (DFmode);
304 })
305
306 (define_expand "push<mode>1"
307 [(match_operand:AVX256MODE 0 "register_operand" "")]
308 "TARGET_AVX"
309 {
310 ix86_expand_push (<MODE>mode, operands[0]);
311 DONE;
312 })
313
314 (define_expand "push<mode>1"
315 [(match_operand:SSEMODE 0 "register_operand" "")]
316 "TARGET_SSE"
317 {
318 ix86_expand_push (<MODE>mode, operands[0]);
319 DONE;
320 })
321
322 (define_expand "movmisalign<mode>"
323 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
324 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
325 "TARGET_AVX"
326 {
327 ix86_expand_vector_move_misalign (<MODE>mode, operands);
328 DONE;
329 })
330
331 (define_expand "movmisalign<mode>"
332 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
333 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
334 "TARGET_SSE"
335 {
336 ix86_expand_vector_move_misalign (<MODE>mode, operands);
337 DONE;
338 })
339
340 (define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
341 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
342 (unspec:AVXMODEF2P
343 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
344 UNSPEC_MOVU))]
345 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
346 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
347 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
348 [(set_attr "type" "ssemov")
349 (set_attr "movu" "1")
350 (set_attr "prefix" "vex")
351 (set_attr "mode" "<MODE>")])
352
353 (define_insn "sse2_movq128"
354 [(set (match_operand:V2DI 0 "register_operand" "=x")
355 (vec_concat:V2DI
356 (vec_select:DI
357 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
358 (parallel [(const_int 0)]))
359 (const_int 0)))]
360 "TARGET_SSE2"
361 "%vmovq\t{%1, %0|%0, %1}"
362 [(set_attr "type" "ssemov")
363 (set_attr "prefix" "maybe_vex")
364 (set_attr "mode" "TI")])
365
366 (define_insn "<sse>_movup<ssemodesuffixf2c>"
367 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
368 (unspec:SSEMODEF2P
369 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
370 UNSPEC_MOVU))]
371 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
372 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
373 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
374 [(set_attr "type" "ssemov")
375 (set_attr "movu" "1")
376 (set_attr "mode" "<MODE>")])
377
378 (define_insn "avx_movdqu<avxmodesuffix>"
379 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
380 (unspec:AVXMODEQI
381 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
382 UNSPEC_MOVU))]
383 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
384 "vmovdqu\t{%1, %0|%0, %1}"
385 [(set_attr "type" "ssemov")
386 (set_attr "movu" "1")
387 (set_attr "prefix" "vex")
388 (set_attr "mode" "<avxvecmode>")])
389
390 (define_insn "sse2_movdqu"
391 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
392 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
393 UNSPEC_MOVU))]
394 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
395 "movdqu\t{%1, %0|%0, %1}"
396 [(set_attr "type" "ssemov")
397 (set_attr "movu" "1")
398 (set_attr "prefix_data16" "1")
399 (set_attr "mode" "TI")])
400
401 (define_insn "avx_movnt<mode>"
402 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
403 (unspec:AVXMODEF2P
404 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
405 UNSPEC_MOVNT))]
406 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
407 "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
408 [(set_attr "type" "ssemov")
409 (set_attr "prefix" "vex")
410 (set_attr "mode" "<MODE>")])
411
412 (define_insn "<sse>_movnt<mode>"
413 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
414 (unspec:SSEMODEF2P
415 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
416 UNSPEC_MOVNT))]
417 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
418 "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
419 [(set_attr "type" "ssemov")
420 (set_attr "mode" "<MODE>")])
421
422 (define_insn "avx_movnt<mode>"
423 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
424 (unspec:AVXMODEDI
425 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
426 UNSPEC_MOVNT))]
427 "TARGET_AVX"
428 "vmovntdq\t{%1, %0|%0, %1}"
429 [(set_attr "type" "ssecvt")
430 (set_attr "prefix" "vex")
431 (set_attr "mode" "<avxvecmode>")])
432
433 (define_insn "sse2_movntv2di"
434 [(set (match_operand:V2DI 0 "memory_operand" "=m")
435 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
436 UNSPEC_MOVNT))]
437 "TARGET_SSE2"
438 "movntdq\t{%1, %0|%0, %1}"
439 [(set_attr "type" "ssemov")
440 (set_attr "prefix_data16" "1")
441 (set_attr "mode" "TI")])
442
443 (define_insn "sse2_movntsi"
444 [(set (match_operand:SI 0 "memory_operand" "=m")
445 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
446 UNSPEC_MOVNT))]
447 "TARGET_SSE2"
448 "movnti\t{%1, %0|%0, %1}"
449 [(set_attr "type" "ssemov")
450 (set_attr "prefix_data16" "0")
451 (set_attr "mode" "V2DF")])
452
453 (define_insn "avx_lddqu<avxmodesuffix>"
454 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
455 (unspec:AVXMODEQI
456 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
457 UNSPEC_LDDQU))]
458 "TARGET_AVX"
459 "vlddqu\t{%1, %0|%0, %1}"
460 [(set_attr "type" "ssecvt")
461 (set_attr "movu" "1")
462 (set_attr "prefix" "vex")
463 (set_attr "mode" "<avxvecmode>")])
464
465 (define_insn "sse3_lddqu"
466 [(set (match_operand:V16QI 0 "register_operand" "=x")
467 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
468 UNSPEC_LDDQU))]
469 "TARGET_SSE3"
470 "lddqu\t{%1, %0|%0, %1}"
471 [(set_attr "type" "ssemov")
472 (set_attr "movu" "1")
473 (set_attr "prefix_data16" "0")
474 (set_attr "prefix_rep" "1")
475 (set_attr "mode" "TI")])
476
477 ; Expand patterns for non-temporal stores. At the moment, only those
478 ; that directly map to insns are defined; it would be possible to
479 ; define patterns for other modes that would expand to several insns.
480
481 (define_expand "storent<mode>"
482 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
483 (unspec:SSEMODEF2P
484 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
485 UNSPEC_MOVNT))]
486 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
487 "")
488
489 (define_expand "storent<mode>"
490 [(set (match_operand:MODEF 0 "memory_operand" "")
491 (unspec:MODEF
492 [(match_operand:MODEF 1 "register_operand" "")]
493 UNSPEC_MOVNT))]
494 "TARGET_SSE4A"
495 "")
496
497 (define_expand "storentv2di"
498 [(set (match_operand:V2DI 0 "memory_operand" "")
499 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
500 UNSPEC_MOVNT))]
501 "TARGET_SSE2"
502 "")
503
504 (define_expand "storentsi"
505 [(set (match_operand:SI 0 "memory_operand" "")
506 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
507 UNSPEC_MOVNT))]
508 "TARGET_SSE2"
509 "")
510
511 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
512 ;;
513 ;; Parallel floating point arithmetic
514 ;;
515 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
516
517 (define_expand "<code><mode>2"
518 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
519 (absneg:SSEMODEF2P
520 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
521 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
522 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
523
524 (define_expand "<plusminus_insn><mode>3"
525 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
526 (plusminus:AVX256MODEF2P
527 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
528 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
529 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
530 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
531
532 (define_insn "*avx_<plusminus_insn><mode>3"
533 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
534 (plusminus:AVXMODEF2P
535 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
536 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
537 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
538 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
539 "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
540 [(set_attr "type" "sseadd")
541 (set_attr "prefix" "vex")
542 (set_attr "mode" "<avxvecmode>")])
543
544 (define_expand "<plusminus_insn><mode>3"
545 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
546 (plusminus:SSEMODEF2P
547 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
548 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
549 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
550 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
551
552 (define_insn "*<plusminus_insn><mode>3"
553 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
554 (plusminus:SSEMODEF2P
555 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
556 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
557 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
558 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
559 "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
560 [(set_attr "type" "sseadd")
561 (set_attr "mode" "<MODE>")])
562
563 (define_insn "*avx_vm<plusminus_insn><mode>3"
564 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
565 (vec_merge:SSEMODEF2P
566 (plusminus:SSEMODEF2P
567 (match_operand:SSEMODEF2P 1 "register_operand" "x")
568 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
569 (match_dup 1)
570 (const_int 1)))]
571 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
572 "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
573 [(set_attr "type" "sseadd")
574 (set_attr "prefix" "vex")
575 (set_attr "mode" "<ssescalarmode>")])
576
577 (define_insn "<sse>_vm<plusminus_insn><mode>3"
578 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
579 (vec_merge:SSEMODEF2P
580 (plusminus:SSEMODEF2P
581 (match_operand:SSEMODEF2P 1 "register_operand" "0")
582 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
583 (match_dup 1)
584 (const_int 1)))]
585 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
586 "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
587 [(set_attr "type" "sseadd")
588 (set_attr "mode" "<ssescalarmode>")])
589
590 (define_expand "mul<mode>3"
591 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
592 (mult:AVX256MODEF2P
593 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
594 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
595 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
596 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
597
598 (define_insn "*avx_mul<mode>3"
599 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
600 (mult:AVXMODEF2P
601 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
602 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
603 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
604 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
605 "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
606 [(set_attr "type" "ssemul")
607 (set_attr "prefix" "vex")
608 (set_attr "mode" "<avxvecmode>")])
609
610 (define_expand "mul<mode>3"
611 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
612 (mult:SSEMODEF2P
613 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
614 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
615 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
616 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
617
618 (define_insn "*mul<mode>3"
619 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
620 (mult:SSEMODEF2P
621 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
622 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
623 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
624 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
625 "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
626 [(set_attr "type" "ssemul")
627 (set_attr "mode" "<MODE>")])
628
629 (define_insn "*avx_vmmul<mode>3"
630 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
631 (vec_merge:SSEMODEF2P
632 (mult:SSEMODEF2P
633 (match_operand:SSEMODEF2P 1 "register_operand" "x")
634 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
635 (match_dup 1)
636 (const_int 1)))]
637 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
638 "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
639 [(set_attr "type" "ssemul")
640 (set_attr "prefix" "vex")
641 (set_attr "mode" "<ssescalarmode>")])
642
643 (define_insn "<sse>_vmmul<mode>3"
644 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
645 (vec_merge:SSEMODEF2P
646 (mult:SSEMODEF2P
647 (match_operand:SSEMODEF2P 1 "register_operand" "0")
648 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
649 (match_dup 1)
650 (const_int 1)))]
651 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
652 "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
653 [(set_attr "type" "ssemul")
654 (set_attr "mode" "<ssescalarmode>")])
655
656 (define_expand "divv8sf3"
657 [(set (match_operand:V8SF 0 "register_operand" "")
658 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
659 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
660 "TARGET_AVX"
661 {
662 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
663
664 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
665 && flag_finite_math_only && !flag_trapping_math
666 && flag_unsafe_math_optimizations)
667 {
668 ix86_emit_swdivsf (operands[0], operands[1],
669 operands[2], V8SFmode);
670 DONE;
671 }
672 })
673
674 (define_expand "divv4df3"
675 [(set (match_operand:V4DF 0 "register_operand" "")
676 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
677 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
678 "TARGET_AVX"
679 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
680
681 (define_insn "avx_div<mode>3"
682 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
683 (div:AVXMODEF2P
684 (match_operand:AVXMODEF2P 1 "register_operand" "x")
685 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
686 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
687 "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
688 [(set_attr "type" "ssediv")
689 (set_attr "prefix" "vex")
690 (set_attr "mode" "<MODE>")])
691
692 (define_expand "divv4sf3"
693 [(set (match_operand:V4SF 0 "register_operand" "")
694 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
695 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
696 "TARGET_SSE"
697 {
698 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
699 && flag_finite_math_only && !flag_trapping_math
700 && flag_unsafe_math_optimizations)
701 {
702 ix86_emit_swdivsf (operands[0], operands[1],
703 operands[2], V4SFmode);
704 DONE;
705 }
706 })
707
708 (define_expand "divv2df3"
709 [(set (match_operand:V2DF 0 "register_operand" "")
710 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
711 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
712 "TARGET_SSE2"
713 "")
714
715 (define_insn "*avx_div<mode>3"
716 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
717 (div:SSEMODEF2P
718 (match_operand:SSEMODEF2P 1 "register_operand" "x")
719 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
720 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
721 "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
722 [(set_attr "type" "ssediv")
723 (set_attr "prefix" "vex")
724 (set_attr "mode" "<MODE>")])
725
726 (define_insn "<sse>_div<mode>3"
727 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
728 (div:SSEMODEF2P
729 (match_operand:SSEMODEF2P 1 "register_operand" "0")
730 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
731 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
732 "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
733 [(set_attr "type" "ssediv")
734 (set_attr "mode" "<MODE>")])
735
736 (define_insn "*avx_vmdiv<mode>3"
737 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
738 (vec_merge:SSEMODEF2P
739 (div:SSEMODEF2P
740 (match_operand:SSEMODEF2P 1 "register_operand" "x")
741 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
742 (match_dup 1)
743 (const_int 1)))]
744 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
745 "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
746 [(set_attr "type" "ssediv")
747 (set_attr "prefix" "vex")
748 (set_attr "mode" "<ssescalarmode>")])
749
750 (define_insn "<sse>_vmdiv<mode>3"
751 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
752 (vec_merge:SSEMODEF2P
753 (div:SSEMODEF2P
754 (match_operand:SSEMODEF2P 1 "register_operand" "0")
755 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
756 (match_dup 1)
757 (const_int 1)))]
758 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
759 "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
760 [(set_attr "type" "ssediv")
761 (set_attr "mode" "<ssescalarmode>")])
762
763 (define_insn "avx_rcpv8sf2"
764 [(set (match_operand:V8SF 0 "register_operand" "=x")
765 (unspec:V8SF
766 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
767 "TARGET_AVX"
768 "vrcpps\t{%1, %0|%0, %1}"
769 [(set_attr "type" "sse")
770 (set_attr "prefix" "vex")
771 (set_attr "mode" "V8SF")])
772
773 (define_insn "sse_rcpv4sf2"
774 [(set (match_operand:V4SF 0 "register_operand" "=x")
775 (unspec:V4SF
776 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
777 "TARGET_SSE"
778 "%vrcpps\t{%1, %0|%0, %1}"
779 [(set_attr "type" "sse")
780 (set_attr "atom_sse_attr" "rcp")
781 (set_attr "prefix" "maybe_vex")
782 (set_attr "mode" "V4SF")])
783
784 (define_insn "*avx_vmrcpv4sf2"
785 [(set (match_operand:V4SF 0 "register_operand" "=x")
786 (vec_merge:V4SF
787 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
788 UNSPEC_RCP)
789 (match_operand:V4SF 2 "register_operand" "x")
790 (const_int 1)))]
791 "TARGET_AVX"
792 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
793 [(set_attr "type" "sse")
794 (set_attr "prefix" "vex")
795 (set_attr "mode" "SF")])
796
797 (define_insn "sse_vmrcpv4sf2"
798 [(set (match_operand:V4SF 0 "register_operand" "=x")
799 (vec_merge:V4SF
800 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
801 UNSPEC_RCP)
802 (match_operand:V4SF 2 "register_operand" "0")
803 (const_int 1)))]
804 "TARGET_SSE"
805 "rcpss\t{%1, %0|%0, %1}"
806 [(set_attr "type" "sse")
807 (set_attr "atom_sse_attr" "rcp")
808 (set_attr "mode" "SF")])
809
810 (define_expand "sqrtv8sf2"
811 [(set (match_operand:V8SF 0 "register_operand" "")
812 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
813 "TARGET_AVX"
814 {
815 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
816 && flag_finite_math_only && !flag_trapping_math
817 && flag_unsafe_math_optimizations)
818 {
819 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
820 DONE;
821 }
822 })
823
824 (define_insn "avx_sqrtv8sf2"
825 [(set (match_operand:V8SF 0 "register_operand" "=x")
826 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
827 "TARGET_AVX"
828 "vsqrtps\t{%1, %0|%0, %1}"
829 [(set_attr "type" "sse")
830 (set_attr "prefix" "vex")
831 (set_attr "mode" "V8SF")])
832
833 (define_expand "sqrtv4sf2"
834 [(set (match_operand:V4SF 0 "register_operand" "")
835 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
836 "TARGET_SSE"
837 {
838 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
839 && flag_finite_math_only && !flag_trapping_math
840 && flag_unsafe_math_optimizations)
841 {
842 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
843 DONE;
844 }
845 })
846
847 (define_insn "sse_sqrtv4sf2"
848 [(set (match_operand:V4SF 0 "register_operand" "=x")
849 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
850 "TARGET_SSE"
851 "%vsqrtps\t{%1, %0|%0, %1}"
852 [(set_attr "type" "sse")
853 (set_attr "atom_sse_attr" "sqrt")
854 (set_attr "prefix" "maybe_vex")
855 (set_attr "mode" "V4SF")])
856
857 (define_insn "sqrtv4df2"
858 [(set (match_operand:V4DF 0 "register_operand" "=x")
859 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
860 "TARGET_AVX"
861 "vsqrtpd\t{%1, %0|%0, %1}"
862 [(set_attr "type" "sse")
863 (set_attr "prefix" "vex")
864 (set_attr "mode" "V4DF")])
865
866 (define_insn "sqrtv2df2"
867 [(set (match_operand:V2DF 0 "register_operand" "=x")
868 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
869 "TARGET_SSE2"
870 "%vsqrtpd\t{%1, %0|%0, %1}"
871 [(set_attr "type" "sse")
872 (set_attr "prefix" "maybe_vex")
873 (set_attr "mode" "V2DF")])
874
875 (define_insn "*avx_vmsqrt<mode>2"
876 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
877 (vec_merge:SSEMODEF2P
878 (sqrt:SSEMODEF2P
879 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
880 (match_operand:SSEMODEF2P 2 "register_operand" "x")
881 (const_int 1)))]
882 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
883 "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
884 [(set_attr "type" "sse")
885 (set_attr "prefix" "vex")
886 (set_attr "mode" "<ssescalarmode>")])
887
888 (define_insn "<sse>_vmsqrt<mode>2"
889 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
890 (vec_merge:SSEMODEF2P
891 (sqrt:SSEMODEF2P
892 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
893 (match_operand:SSEMODEF2P 2 "register_operand" "0")
894 (const_int 1)))]
895 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
896 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
897 [(set_attr "type" "sse")
898 (set_attr "atom_sse_attr" "sqrt")
899 (set_attr "mode" "<ssescalarmode>")])
900
901 (define_expand "rsqrtv8sf2"
902 [(set (match_operand:V8SF 0 "register_operand" "")
903 (unspec:V8SF
904 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
905 "TARGET_AVX && TARGET_SSE_MATH"
906 {
907 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
908 DONE;
909 })
910
911 (define_insn "avx_rsqrtv8sf2"
912 [(set (match_operand:V8SF 0 "register_operand" "=x")
913 (unspec:V8SF
914 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
915 "TARGET_AVX"
916 "vrsqrtps\t{%1, %0|%0, %1}"
917 [(set_attr "type" "sse")
918 (set_attr "prefix" "vex")
919 (set_attr "mode" "V8SF")])
920
921 (define_expand "rsqrtv4sf2"
922 [(set (match_operand:V4SF 0 "register_operand" "")
923 (unspec:V4SF
924 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
925 "TARGET_SSE_MATH"
926 {
927 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
928 DONE;
929 })
930
931 (define_insn "sse_rsqrtv4sf2"
932 [(set (match_operand:V4SF 0 "register_operand" "=x")
933 (unspec:V4SF
934 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
935 "TARGET_SSE"
936 "%vrsqrtps\t{%1, %0|%0, %1}"
937 [(set_attr "type" "sse")
938 (set_attr "prefix" "maybe_vex")
939 (set_attr "mode" "V4SF")])
940
941 (define_insn "*avx_vmrsqrtv4sf2"
942 [(set (match_operand:V4SF 0 "register_operand" "=x")
943 (vec_merge:V4SF
944 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
945 UNSPEC_RSQRT)
946 (match_operand:V4SF 2 "register_operand" "x")
947 (const_int 1)))]
948 "TARGET_AVX"
949 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
950 [(set_attr "type" "sse")
951 (set_attr "prefix" "vex")
952 (set_attr "mode" "SF")])
953
954 (define_insn "sse_vmrsqrtv4sf2"
955 [(set (match_operand:V4SF 0 "register_operand" "=x")
956 (vec_merge:V4SF
957 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
958 UNSPEC_RSQRT)
959 (match_operand:V4SF 2 "register_operand" "0")
960 (const_int 1)))]
961 "TARGET_SSE"
962 "rsqrtss\t{%1, %0|%0, %1}"
963 [(set_attr "type" "sse")
964 (set_attr "mode" "SF")])
965
966 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
967 ;; isn't really correct, as those rtl operators aren't defined when
968 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
969
970 (define_expand "<code><mode>3"
971 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
972 (smaxmin:AVX256MODEF2P
973 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
974 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
975 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
976 {
977 if (!flag_finite_math_only)
978 operands[1] = force_reg (<MODE>mode, operands[1]);
979 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
980 })
981
982 (define_expand "<code><mode>3"
983 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
984 (smaxmin:SSEMODEF2P
985 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
986 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
987 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
988 {
989 if (!flag_finite_math_only)
990 operands[1] = force_reg (<MODE>mode, operands[1]);
991 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
992 })
993
994 (define_insn "*avx_<code><mode>3_finite"
995 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
996 (smaxmin:AVXMODEF2P
997 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
998 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
999 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1000 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1001 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1002 [(set_attr "type" "sseadd")
1003 (set_attr "prefix" "vex")
1004 (set_attr "mode" "<MODE>")])
1005
1006 (define_insn "*<code><mode>3_finite"
1007 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1008 (smaxmin:SSEMODEF2P
1009 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1010 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1011 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1012 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1013 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1014 [(set_attr "type" "sseadd")
1015 (set_attr "mode" "<MODE>")])
1016
1017 (define_insn "*avx_<code><mode>3"
1018 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1019 (smaxmin:AVXMODEF2P
1020 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1021 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1022 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1023 "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1024 [(set_attr "type" "sseadd")
1025 (set_attr "prefix" "vex")
1026 (set_attr "mode" "<avxvecmode>")])
1027
1028 (define_insn "*<code><mode>3"
1029 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1030 (smaxmin:SSEMODEF2P
1031 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1032 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1033 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1034 "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1035 [(set_attr "type" "sseadd")
1036 (set_attr "mode" "<MODE>")])
1037
1038 (define_insn "*avx_vm<code><mode>3"
1039 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1040 (vec_merge:SSEMODEF2P
1041 (smaxmin:SSEMODEF2P
1042 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1043 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1044 (match_dup 1)
1045 (const_int 1)))]
1046 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1047 "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1048 [(set_attr "type" "sse")
1049 (set_attr "prefix" "vex")
1050 (set_attr "mode" "<ssescalarmode>")])
1051
1052 (define_insn "<sse>_vm<code><mode>3"
1053 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1054 (vec_merge:SSEMODEF2P
1055 (smaxmin:SSEMODEF2P
1056 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1057 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1058 (match_dup 1)
1059 (const_int 1)))]
1060 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1061 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1062 [(set_attr "type" "sseadd")
1063 (set_attr "mode" "<ssescalarmode>")])
1064
1065 ;; These versions of the min/max patterns implement exactly the operations
1066 ;; min = (op1 < op2 ? op1 : op2)
1067 ;; max = (!(op1 < op2) ? op1 : op2)
1068 ;; Their operands are not commutative, and thus they may be used in the
1069 ;; presence of -0.0 and NaN.
1070
1071 (define_insn "*avx_ieee_smin<mode>3"
1072 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1073 (unspec:AVXMODEF2P
1074 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1075 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1076 UNSPEC_IEEE_MIN))]
1077 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1078 "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1079 [(set_attr "type" "sseadd")
1080 (set_attr "prefix" "vex")
1081 (set_attr "mode" "<avxvecmode>")])
1082
1083 (define_insn "*avx_ieee_smax<mode>3"
1084 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1085 (unspec:AVXMODEF2P
1086 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1087 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1088 UNSPEC_IEEE_MAX))]
1089 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1090 "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1091 [(set_attr "type" "sseadd")
1092 (set_attr "prefix" "vex")
1093 (set_attr "mode" "<avxvecmode>")])
1094
1095 (define_insn "*ieee_smin<mode>3"
1096 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1097 (unspec:SSEMODEF2P
1098 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1099 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1100 UNSPEC_IEEE_MIN))]
1101 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1102 "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1103 [(set_attr "type" "sseadd")
1104 (set_attr "mode" "<MODE>")])
1105
1106 (define_insn "*ieee_smax<mode>3"
1107 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1108 (unspec:SSEMODEF2P
1109 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1110 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1111 UNSPEC_IEEE_MAX))]
1112 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1113 "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1114 [(set_attr "type" "sseadd")
1115 (set_attr "mode" "<MODE>")])
1116
1117 (define_insn "avx_addsubv8sf3"
1118 [(set (match_operand:V8SF 0 "register_operand" "=x")
1119 (vec_merge:V8SF
1120 (plus:V8SF
1121 (match_operand:V8SF 1 "register_operand" "x")
1122 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1123 (minus:V8SF (match_dup 1) (match_dup 2))
1124 (const_int 170)))]
1125 "TARGET_AVX"
1126 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1127 [(set_attr "type" "sseadd")
1128 (set_attr "prefix" "vex")
1129 (set_attr "mode" "V8SF")])
1130
1131 (define_insn "avx_addsubv4df3"
1132 [(set (match_operand:V4DF 0 "register_operand" "=x")
1133 (vec_merge:V4DF
1134 (plus:V4DF
1135 (match_operand:V4DF 1 "register_operand" "x")
1136 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1137 (minus:V4DF (match_dup 1) (match_dup 2))
1138 (const_int 10)))]
1139 "TARGET_AVX"
1140 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1141 [(set_attr "type" "sseadd")
1142 (set_attr "prefix" "vex")
1143 (set_attr "mode" "V4DF")])
1144
1145 (define_insn "*avx_addsubv4sf3"
1146 [(set (match_operand:V4SF 0 "register_operand" "=x")
1147 (vec_merge:V4SF
1148 (plus:V4SF
1149 (match_operand:V4SF 1 "register_operand" "x")
1150 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1151 (minus:V4SF (match_dup 1) (match_dup 2))
1152 (const_int 10)))]
1153 "TARGET_AVX"
1154 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1155 [(set_attr "type" "sseadd")
1156 (set_attr "prefix" "vex")
1157 (set_attr "mode" "V4SF")])
1158
1159 (define_insn "sse3_addsubv4sf3"
1160 [(set (match_operand:V4SF 0 "register_operand" "=x")
1161 (vec_merge:V4SF
1162 (plus:V4SF
1163 (match_operand:V4SF 1 "register_operand" "0")
1164 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1165 (minus:V4SF (match_dup 1) (match_dup 2))
1166 (const_int 10)))]
1167 "TARGET_SSE3"
1168 "addsubps\t{%2, %0|%0, %2}"
1169 [(set_attr "type" "sseadd")
1170 (set_attr "prefix_rep" "1")
1171 (set_attr "mode" "V4SF")])
1172
1173 (define_insn "*avx_addsubv2df3"
1174 [(set (match_operand:V2DF 0 "register_operand" "=x")
1175 (vec_merge:V2DF
1176 (plus:V2DF
1177 (match_operand:V2DF 1 "register_operand" "x")
1178 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1179 (minus:V2DF (match_dup 1) (match_dup 2))
1180 (const_int 2)))]
1181 "TARGET_AVX"
1182 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1183 [(set_attr "type" "sseadd")
1184 (set_attr "prefix" "vex")
1185 (set_attr "mode" "V2DF")])
1186
1187 (define_insn "sse3_addsubv2df3"
1188 [(set (match_operand:V2DF 0 "register_operand" "=x")
1189 (vec_merge:V2DF
1190 (plus:V2DF
1191 (match_operand:V2DF 1 "register_operand" "0")
1192 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1193 (minus:V2DF (match_dup 1) (match_dup 2))
1194 (const_int 2)))]
1195 "TARGET_SSE3"
1196 "addsubpd\t{%2, %0|%0, %2}"
1197 [(set_attr "type" "sseadd")
1198 (set_attr "atom_unit" "complex")
1199 (set_attr "mode" "V2DF")])
1200
1201 (define_insn "avx_h<plusminus_insn>v4df3"
1202 [(set (match_operand:V4DF 0 "register_operand" "=x")
1203 (vec_concat:V4DF
1204 (vec_concat:V2DF
1205 (plusminus:DF
1206 (vec_select:DF
1207 (match_operand:V4DF 1 "register_operand" "x")
1208 (parallel [(const_int 0)]))
1209 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1210 (plusminus:DF
1211 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1212 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1213 (vec_concat:V2DF
1214 (plusminus:DF
1215 (vec_select:DF
1216 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1217 (parallel [(const_int 0)]))
1218 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1219 (plusminus:DF
1220 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1221 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1222 "TARGET_AVX"
1223 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1224 [(set_attr "type" "sseadd")
1225 (set_attr "prefix" "vex")
1226 (set_attr "mode" "V4DF")])
1227
1228 (define_insn "avx_h<plusminus_insn>v8sf3"
1229 [(set (match_operand:V8SF 0 "register_operand" "=x")
1230 (vec_concat:V8SF
1231 (vec_concat:V4SF
1232 (vec_concat:V2SF
1233 (plusminus:SF
1234 (vec_select:SF
1235 (match_operand:V8SF 1 "register_operand" "x")
1236 (parallel [(const_int 0)]))
1237 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1238 (plusminus:SF
1239 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1240 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1241 (vec_concat:V2SF
1242 (plusminus:SF
1243 (vec_select:SF
1244 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1245 (parallel [(const_int 0)]))
1246 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1247 (plusminus:SF
1248 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1249 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1250 (vec_concat:V4SF
1251 (vec_concat:V2SF
1252 (plusminus:SF
1253 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1254 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1255 (plusminus:SF
1256 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1257 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1258 (vec_concat:V2SF
1259 (plusminus:SF
1260 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1261 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1262 (plusminus:SF
1263 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1264 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1265 "TARGET_AVX"
1266 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1267 [(set_attr "type" "sseadd")
1268 (set_attr "prefix" "vex")
1269 (set_attr "mode" "V8SF")])
1270
1271 (define_insn "*avx_h<plusminus_insn>v4sf3"
1272 [(set (match_operand:V4SF 0 "register_operand" "=x")
1273 (vec_concat:V4SF
1274 (vec_concat:V2SF
1275 (plusminus:SF
1276 (vec_select:SF
1277 (match_operand:V4SF 1 "register_operand" "x")
1278 (parallel [(const_int 0)]))
1279 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1280 (plusminus:SF
1281 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1282 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1283 (vec_concat:V2SF
1284 (plusminus:SF
1285 (vec_select:SF
1286 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1287 (parallel [(const_int 0)]))
1288 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1289 (plusminus:SF
1290 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1291 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1292 "TARGET_AVX"
1293 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1294 [(set_attr "type" "sseadd")
1295 (set_attr "prefix" "vex")
1296 (set_attr "mode" "V4SF")])
1297
1298 (define_insn "sse3_h<plusminus_insn>v4sf3"
1299 [(set (match_operand:V4SF 0 "register_operand" "=x")
1300 (vec_concat:V4SF
1301 (vec_concat:V2SF
1302 (plusminus:SF
1303 (vec_select:SF
1304 (match_operand:V4SF 1 "register_operand" "0")
1305 (parallel [(const_int 0)]))
1306 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1307 (plusminus:SF
1308 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1309 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1310 (vec_concat:V2SF
1311 (plusminus:SF
1312 (vec_select:SF
1313 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1314 (parallel [(const_int 0)]))
1315 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1316 (plusminus:SF
1317 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1318 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1319 "TARGET_SSE3"
1320 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1321 [(set_attr "type" "sseadd")
1322 (set_attr "atom_unit" "complex")
1323 (set_attr "prefix_rep" "1")
1324 (set_attr "mode" "V4SF")])
1325
1326 (define_insn "*avx_h<plusminus_insn>v2df3"
1327 [(set (match_operand:V2DF 0 "register_operand" "=x")
1328 (vec_concat:V2DF
1329 (plusminus:DF
1330 (vec_select:DF
1331 (match_operand:V2DF 1 "register_operand" "x")
1332 (parallel [(const_int 0)]))
1333 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1334 (plusminus:DF
1335 (vec_select:DF
1336 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1337 (parallel [(const_int 0)]))
1338 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1339 "TARGET_AVX"
1340 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1341 [(set_attr "type" "sseadd")
1342 (set_attr "prefix" "vex")
1343 (set_attr "mode" "V2DF")])
1344
1345 (define_insn "sse3_h<plusminus_insn>v2df3"
1346 [(set (match_operand:V2DF 0 "register_operand" "=x")
1347 (vec_concat:V2DF
1348 (plusminus:DF
1349 (vec_select:DF
1350 (match_operand:V2DF 1 "register_operand" "0")
1351 (parallel [(const_int 0)]))
1352 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1353 (plusminus:DF
1354 (vec_select:DF
1355 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1356 (parallel [(const_int 0)]))
1357 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1358 "TARGET_SSE3"
1359 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1360 [(set_attr "type" "sseadd")
1361 (set_attr "mode" "V2DF")])
1362
1363 (define_expand "reduc_splus_v4sf"
1364 [(match_operand:V4SF 0 "register_operand" "")
1365 (match_operand:V4SF 1 "register_operand" "")]
1366 "TARGET_SSE"
1367 {
1368 if (TARGET_SSE3)
1369 {
1370 rtx tmp = gen_reg_rtx (V4SFmode);
1371 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1372 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1373 }
1374 else
1375 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1376 DONE;
1377 })
1378
1379 (define_expand "reduc_splus_v2df"
1380 [(match_operand:V2DF 0 "register_operand" "")
1381 (match_operand:V2DF 1 "register_operand" "")]
1382 "TARGET_SSE3"
1383 {
1384 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1385 DONE;
1386 })
1387
1388 (define_expand "reduc_smax_v4sf"
1389 [(match_operand:V4SF 0 "register_operand" "")
1390 (match_operand:V4SF 1 "register_operand" "")]
1391 "TARGET_SSE"
1392 {
1393 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1394 DONE;
1395 })
1396
1397 (define_expand "reduc_smin_v4sf"
1398 [(match_operand:V4SF 0 "register_operand" "")
1399 (match_operand:V4SF 1 "register_operand" "")]
1400 "TARGET_SSE"
1401 {
1402 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1403 DONE;
1404 })
1405
1406 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1407 ;;
1408 ;; Parallel floating point comparisons
1409 ;;
1410 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1411
1412 (define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
1413 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1414 (unspec:AVXMODEF2P
1415 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1416 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1417 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1418 UNSPEC_PCMP))]
1419 "TARGET_AVX"
1420 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1421 [(set_attr "type" "ssecmp")
1422 (set_attr "length_immediate" "1")
1423 (set_attr "prefix" "vex")
1424 (set_attr "mode" "<MODE>")])
1425
1426 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1427 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1428 (vec_merge:SSEMODEF2P
1429 (unspec:SSEMODEF2P
1430 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1431 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1432 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1433 UNSPEC_PCMP)
1434 (match_dup 1)
1435 (const_int 1)))]
1436 "TARGET_AVX"
1437 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1438 [(set_attr "type" "ssecmp")
1439 (set_attr "length_immediate" "1")
1440 (set_attr "prefix" "vex")
1441 (set_attr "mode" "<ssescalarmode>")])
1442
1443 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1444 ;; may generate 256bit vector compare instructions.
1445 (define_insn "*avx_maskcmp<mode>3"
1446 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1447 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1448 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1449 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1450 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1451 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1452 [(set_attr "type" "ssecmp")
1453 (set_attr "prefix" "vex")
1454 (set_attr "length_immediate" "1")
1455 (set_attr "mode" "<avxvecmode>")])
1456
1457 (define_insn "<sse>_maskcmp<mode>3"
1458 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1459 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1460 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1461 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1462 "!TARGET_XOP
1463 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1464 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1465 [(set_attr "type" "ssecmp")
1466 (set_attr "length_immediate" "1")
1467 (set_attr "mode" "<MODE>")])
1468
1469 (define_insn "<sse>_vmmaskcmp<mode>3"
1470 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1471 (vec_merge:SSEMODEF2P
1472 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1473 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1474 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1475 (match_dup 1)
1476 (const_int 1)))]
1477 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1478 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1479 [(set_attr "type" "ssecmp")
1480 (set_attr "length_immediate" "1")
1481 (set_attr "mode" "<ssescalarmode>")])
1482
1483 (define_insn "<sse>_comi"
1484 [(set (reg:CCFP FLAGS_REG)
1485 (compare:CCFP
1486 (vec_select:MODEF
1487 (match_operand:<ssevecmode> 0 "register_operand" "x")
1488 (parallel [(const_int 0)]))
1489 (vec_select:MODEF
1490 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1491 (parallel [(const_int 0)]))))]
1492 "SSE_FLOAT_MODE_P (<MODE>mode)"
1493 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1494 [(set_attr "type" "ssecomi")
1495 (set_attr "prefix" "maybe_vex")
1496 (set_attr "prefix_rep" "0")
1497 (set (attr "prefix_data16")
1498 (if_then_else (eq_attr "mode" "DF")
1499 (const_string "1")
1500 (const_string "0")))
1501 (set_attr "mode" "<MODE>")])
1502
1503 (define_insn "<sse>_ucomi"
1504 [(set (reg:CCFPU FLAGS_REG)
1505 (compare:CCFPU
1506 (vec_select:MODEF
1507 (match_operand:<ssevecmode> 0 "register_operand" "x")
1508 (parallel [(const_int 0)]))
1509 (vec_select:MODEF
1510 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1511 (parallel [(const_int 0)]))))]
1512 "SSE_FLOAT_MODE_P (<MODE>mode)"
1513 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1514 [(set_attr "type" "ssecomi")
1515 (set_attr "prefix" "maybe_vex")
1516 (set_attr "prefix_rep" "0")
1517 (set (attr "prefix_data16")
1518 (if_then_else (eq_attr "mode" "DF")
1519 (const_string "1")
1520 (const_string "0")))
1521 (set_attr "mode" "<MODE>")])
1522
1523 (define_expand "vcond<mode>"
1524 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1525 (if_then_else:SSEMODEF2P
1526 (match_operator 3 ""
1527 [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1528 (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1529 (match_operand:SSEMODEF2P 1 "general_operand" "")
1530 (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1531 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1532 {
1533 bool ok = ix86_expand_fp_vcond (operands);
1534 gcc_assert (ok);
1535 DONE;
1536 })
1537
1538 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1539 ;;
1540 ;; Parallel floating point logical operations
1541 ;;
1542 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1543
1544 (define_insn "avx_andnot<mode>3"
1545 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1546 (and:AVXMODEF2P
1547 (not:AVXMODEF2P
1548 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1549 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1550 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1551 "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1552 [(set_attr "type" "sselog")
1553 (set_attr "prefix" "vex")
1554 (set_attr "mode" "<avxvecmode>")])
1555
1556 (define_insn "<sse>_andnot<mode>3"
1557 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1558 (and:SSEMODEF2P
1559 (not:SSEMODEF2P
1560 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1561 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1562 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1563 "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1564 [(set_attr "type" "sselog")
1565 (set_attr "mode" "<MODE>")])
1566
1567 (define_expand "<code><mode>3"
1568 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1569 (plogic:AVX256MODEF2P
1570 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1571 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1572 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1573 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1574
1575 (define_insn "*avx_<code><mode>3"
1576 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1577 (plogic:AVXMODEF2P
1578 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1579 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1580 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1581 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1582 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1583 [(set_attr "type" "sselog")
1584 (set_attr "prefix" "vex")
1585 (set_attr "mode" "<avxvecmode>")])
1586
1587 (define_expand "<code><mode>3"
1588 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1589 (plogic:SSEMODEF2P
1590 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1591 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1592 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1593 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1594
1595 (define_insn "*<code><mode>3"
1596 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1597 (plogic:SSEMODEF2P
1598 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1599 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1600 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1601 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1602 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1603 [(set_attr "type" "sselog")
1604 (set_attr "mode" "<MODE>")])
1605
1606 (define_expand "copysign<mode>3"
1607 [(set (match_dup 4)
1608 (and:SSEMODEF2P
1609 (not:SSEMODEF2P (match_dup 3))
1610 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1611 (set (match_dup 5)
1612 (and:SSEMODEF2P (match_dup 3)
1613 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1614 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1615 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1616 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1617 {
1618 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1619
1620 operands[4] = gen_reg_rtx (<MODE>mode);
1621 operands[5] = gen_reg_rtx (<MODE>mode);
1622 })
1623
1624 ;; Also define scalar versions. These are used for abs, neg, and
1625 ;; conditional move. Using subregs into vector modes causes register
1626 ;; allocation lossage. These patterns do not allow memory operands
1627 ;; because the native instructions read the full 128-bits.
1628
1629 (define_insn "*avx_andnot<mode>3"
1630 [(set (match_operand:MODEF 0 "register_operand" "=x")
1631 (and:MODEF
1632 (not:MODEF
1633 (match_operand:MODEF 1 "register_operand" "x"))
1634 (match_operand:MODEF 2 "register_operand" "x")))]
1635 "AVX_FLOAT_MODE_P (<MODE>mode)"
1636 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1637 [(set_attr "type" "sselog")
1638 (set_attr "prefix" "vex")
1639 (set_attr "mode" "<ssevecmode>")])
1640
1641 (define_insn "*andnot<mode>3"
1642 [(set (match_operand:MODEF 0 "register_operand" "=x")
1643 (and:MODEF
1644 (not:MODEF
1645 (match_operand:MODEF 1 "register_operand" "0"))
1646 (match_operand:MODEF 2 "register_operand" "x")))]
1647 "SSE_FLOAT_MODE_P (<MODE>mode)"
1648 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1649 [(set_attr "type" "sselog")
1650 (set_attr "mode" "<ssevecmode>")])
1651
1652 (define_insn "*avx_<code><mode>3"
1653 [(set (match_operand:MODEF 0 "register_operand" "=x")
1654 (plogic:MODEF
1655 (match_operand:MODEF 1 "register_operand" "x")
1656 (match_operand:MODEF 2 "register_operand" "x")))]
1657 "AVX_FLOAT_MODE_P (<MODE>mode)"
1658 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1659 [(set_attr "type" "sselog")
1660 (set_attr "prefix" "vex")
1661 (set_attr "mode" "<ssevecmode>")])
1662
1663 (define_insn "*<code><mode>3"
1664 [(set (match_operand:MODEF 0 "register_operand" "=x")
1665 (plogic:MODEF
1666 (match_operand:MODEF 1 "register_operand" "0")
1667 (match_operand:MODEF 2 "register_operand" "x")))]
1668 "SSE_FLOAT_MODE_P (<MODE>mode)"
1669 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1670 [(set_attr "type" "sselog")
1671 (set_attr "mode" "<ssevecmode>")])
1672
1673 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1674 ;;
1675 ;; FMA4 floating point multiply/accumulate instructions This includes the
1676 ;; scalar version of the instructions as well as the vector
1677 ;;
1678 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1679
1680 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1681 ;; combine to generate a multiply/add with two memory references. We then
1682 ;; split this insn, into loading up the destination register with one of the
1683 ;; memory operations. If we don't manage to split the insn, reload will
1684 ;; generate the appropriate moves. The reason this is needed, is that combine
1685 ;; has already folded one of the memory references into both the multiply and
1686 ;; add insns, and it can't generate a new pseudo. I.e.:
1687 ;; (set (reg1) (mem (addr1)))
1688 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1689 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1690
1691 (define_insn "fma4_fmadd<mode>4256"
1692 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
1693 (plus:FMA4MODEF4
1694 (mult:FMA4MODEF4
1695 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
1696 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))
1697 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1698 "TARGET_FMA4
1699 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1700 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1701 [(set_attr "type" "ssemuladd")
1702 (set_attr "mode" "<MODE>")])
1703
1704 ;; Split fmadd with two memory operands into a load and the fmadd.
1705 (define_split
1706 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1707 (plus:FMA4MODEF4
1708 (mult:FMA4MODEF4
1709 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
1710 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
1711 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
1712 "TARGET_FMA4
1713 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1714 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1715 && !reg_mentioned_p (operands[0], operands[1])
1716 && !reg_mentioned_p (operands[0], operands[2])
1717 && !reg_mentioned_p (operands[0], operands[3])"
1718 [(const_int 0)]
1719 {
1720 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1721 emit_insn (gen_fma4_fmadd<mode>4256 (operands[0], operands[1],
1722 operands[2], operands[3]));
1723 DONE;
1724 })
1725
1726 ;; Floating multiply and subtract
1727 ;; Allow two memory operands the same as fmadd
1728 (define_insn "fma4_fmsub<mode>4256"
1729 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
1730 (minus:FMA4MODEF4
1731 (mult:FMA4MODEF4
1732 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
1733 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))
1734 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1735 "TARGET_FMA4
1736 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1737 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1738 [(set_attr "type" "ssemuladd")
1739 (set_attr "mode" "<MODE>")])
1740
1741 ;; Split fmsub with two memory operands into a load and the fmsub.
1742 (define_split
1743 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1744 (minus:FMA4MODEF4
1745 (mult:FMA4MODEF4
1746 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
1747 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
1748 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
1749 "TARGET_FMA4
1750 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1751 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1752 && !reg_mentioned_p (operands[0], operands[1])
1753 && !reg_mentioned_p (operands[0], operands[2])
1754 && !reg_mentioned_p (operands[0], operands[3])"
1755 [(const_int 0)]
1756 {
1757 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1758 emit_insn (gen_fma4_fmsub<mode>4256 (operands[0], operands[1],
1759 operands[2], operands[3]));
1760 DONE;
1761 })
1762
1763 ;; Floating point negative multiply and add
1764 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1765 ;; Note operands are out of order to simplify call to ix86_fma4_valid_p
1766 ;; Allow two memory operands to help in optimizing.
1767 (define_insn "fma4_fnmadd<mode>4256"
1768 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x")
1769 (minus:FMA4MODEF4
1770 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")
1771 (mult:FMA4MODEF4
1772 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm")
1773 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))))]
1774 "TARGET_FMA4
1775 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1776 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1777 [(set_attr "type" "ssemuladd")
1778 (set_attr "mode" "<MODE>")])
1779
1780 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1781 (define_split
1782 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1783 (minus:FMA4MODEF4
1784 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")
1785 (mult:FMA4MODEF4
1786 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
1787 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))))]
1788 "TARGET_FMA4
1789 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1790 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1791 && !reg_mentioned_p (operands[0], operands[1])
1792 && !reg_mentioned_p (operands[0], operands[2])
1793 && !reg_mentioned_p (operands[0], operands[3])"
1794 [(const_int 0)]
1795 {
1796 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1797 emit_insn (gen_fma4_fnmadd<mode>4256 (operands[0], operands[1],
1798 operands[2], operands[3]));
1799 DONE;
1800 })
1801
1802 ;; Floating point negative multiply and subtract
1803 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1804 ;; Allow 2 memory operands to help with optimization
1805 (define_insn "fma4_fnmsub<mode>4256"
1806 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1807 (minus:FMA4MODEF4
1808 (mult:FMA4MODEF4
1809 (neg:FMA4MODEF4
1810 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x"))
1811 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
1812 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1813 "TARGET_FMA4
1814 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
1815 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1816 [(set_attr "type" "ssemuladd")
1817 (set_attr "mode" "<MODE>")])
1818
1819 ;; Split fnmsub with two memory operands into a load and the fmsub.
1820 (define_split
1821 [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
1822 (minus:FMA4MODEF4
1823 (mult:FMA4MODEF4
1824 (neg:FMA4MODEF4
1825 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" ""))
1826 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
1827 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
1828 "TARGET_FMA4
1829 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)
1830 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)
1831 && !reg_mentioned_p (operands[0], operands[1])
1832 && !reg_mentioned_p (operands[0], operands[2])
1833 && !reg_mentioned_p (operands[0], operands[3])"
1834 [(const_int 0)]
1835 {
1836 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1837 emit_insn (gen_fma4_fnmsub<mode>4256 (operands[0], operands[1],
1838 operands[2], operands[3]));
1839 DONE;
1840 })
1841
1842 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1843 (define_insn "fma4_fmadd<mode>4"
1844 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
1845 (plus:SSEMODEF4
1846 (mult:SSEMODEF4
1847 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
1848 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))
1849 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1850 "TARGET_FMA4
1851 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1852 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1853 [(set_attr "type" "ssemuladd")
1854 (set_attr "mode" "<MODE>")])
1855
1856 ;; Split fmadd with two memory operands into a load and the fmadd.
1857 (define_split
1858 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1859 (plus:SSEMODEF4
1860 (mult:SSEMODEF4
1861 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1862 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1863 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1864 "TARGET_FMA4
1865 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1866 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1867 && !reg_mentioned_p (operands[0], operands[1])
1868 && !reg_mentioned_p (operands[0], operands[2])
1869 && !reg_mentioned_p (operands[0], operands[3])"
1870 [(const_int 0)]
1871 {
1872 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1873 emit_insn (gen_fma4_fmadd<mode>4 (operands[0], operands[1],
1874 operands[2], operands[3]));
1875 DONE;
1876 })
1877
1878 ;; For the scalar operations, use operand1 for the upper words that aren't
1879 ;; modified, so restrict the forms that are generated.
1880 ;; Scalar version of fmadd
1881 (define_insn "fma4_vmfmadd<mode>4"
1882 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1883 (vec_merge:SSEMODEF2P
1884 (plus:SSEMODEF2P
1885 (mult:SSEMODEF2P
1886 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
1887 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1888 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1889 (match_dup 0)
1890 (const_int 1)))]
1891 "TARGET_FMA4
1892 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
1893 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1894 [(set_attr "type" "ssemuladd")
1895 (set_attr "mode" "<MODE>")])
1896
1897 ;; Floating multiply and subtract
1898 ;; Allow two memory operands the same as fmadd
1899 (define_insn "fma4_fmsub<mode>4"
1900 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
1901 (minus:SSEMODEF4
1902 (mult:SSEMODEF4
1903 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
1904 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))
1905 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")))]
1906 "TARGET_FMA4
1907 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1908 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1909 [(set_attr "type" "ssemuladd")
1910 (set_attr "mode" "<MODE>")])
1911
1912 ;; Split fmsub with two memory operands into a load and the fmsub.
1913 (define_split
1914 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1915 (minus:SSEMODEF4
1916 (mult:SSEMODEF4
1917 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1918 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1919 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1920 "TARGET_FMA4
1921 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1922 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1923 && !reg_mentioned_p (operands[0], operands[1])
1924 && !reg_mentioned_p (operands[0], operands[2])
1925 && !reg_mentioned_p (operands[0], operands[3])"
1926 [(const_int 0)]
1927 {
1928 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1929 emit_insn (gen_fma4_fmsub<mode>4 (operands[0], operands[1],
1930 operands[2], operands[3]));
1931 DONE;
1932 })
1933
1934 ;; For the scalar operations, use operand1 for the upper words that aren't
1935 ;; modified, so restrict the forms that are generated.
1936 ;; Scalar version of fmsub
1937 (define_insn "fma4_vmfmsub<mode>4"
1938 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1939 (vec_merge:SSEMODEF2P
1940 (minus:SSEMODEF2P
1941 (mult:SSEMODEF2P
1942 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
1943 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
1944 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1945 (match_dup 0)
1946 (const_int 1)))]
1947 "TARGET_FMA4
1948 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
1949 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1950 [(set_attr "type" "ssemuladd")
1951 (set_attr "mode" "<MODE>")])
1952
1953 ;; Floating point negative multiply and add
1954 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
1955 ;; Note operands are out of order to simplify call to ix86_fma4_valid_p
1956 ;; Allow two memory operands to help in optimizing.
1957 (define_insn "fma4_fnmadd<mode>4"
1958 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x")
1959 (minus:SSEMODEF4
1960 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")
1961 (mult:SSEMODEF4
1962 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm")
1963 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))))]
1964 "TARGET_FMA4
1965 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
1966 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1967 [(set_attr "type" "ssemuladd")
1968 (set_attr "mode" "<MODE>")])
1969
1970 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1971 (define_split
1972 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1973 (minus:SSEMODEF4
1974 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1975 (mult:SSEMODEF4
1976 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1977 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1978 "TARGET_FMA4
1979 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
1980 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
1981 && !reg_mentioned_p (operands[0], operands[1])
1982 && !reg_mentioned_p (operands[0], operands[2])
1983 && !reg_mentioned_p (operands[0], operands[3])"
1984 [(const_int 0)]
1985 {
1986 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
1987 emit_insn (gen_fma4_fnmadd<mode>4 (operands[0], operands[1],
1988 operands[2], operands[3]));
1989 DONE;
1990 })
1991
1992 ;; For the scalar operations, use operand1 for the upper words that aren't
1993 ;; modified, so restrict the forms that are generated.
1994 ;; Scalar version of fnmadd
1995 (define_insn "fma4_vmfnmadd<mode>4"
1996 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1997 (vec_merge:SSEMODEF2P
1998 (minus:SSEMODEF2P
1999 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2000 (mult:SSEMODEF2P
2001 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2002 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2003 (match_dup 0)
2004 (const_int 1)))]
2005 "TARGET_FMA4
2006 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2007 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2008 [(set_attr "type" "ssemuladd")
2009 (set_attr "mode" "<MODE>")])
2010
2011 ;; Floating point negative multiply and subtract
2012 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
2013 ;; Allow 2 memory operands to help with optimization
2014 (define_insn "fma4_fnmsub<mode>4"
2015 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
2016 (minus:SSEMODEF4
2017 (mult:SSEMODEF4
2018 (neg:SSEMODEF4
2019 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x"))
2020 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
2021 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
2022 "TARGET_FMA4
2023 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
2024 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2025 [(set_attr "type" "ssemuladd")
2026 (set_attr "mode" "<MODE>")])
2027
2028 ;; Split fnmsub with two memory operands into a load and the fmsub.
2029 (define_split
2030 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
2031 (minus:SSEMODEF4
2032 (mult:SSEMODEF4
2033 (neg:SSEMODEF4
2034 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
2035 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
2036 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
2037 "TARGET_FMA4
2038 && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)
2039 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)
2040 && !reg_mentioned_p (operands[0], operands[1])
2041 && !reg_mentioned_p (operands[0], operands[2])
2042 && !reg_mentioned_p (operands[0], operands[3])"
2043 [(const_int 0)]
2044 {
2045 ix86_expand_fma4_multiple_memory (operands, 4, <MODE>mode);
2046 emit_insn (gen_fma4_fnmsub<mode>4 (operands[0], operands[1],
2047 operands[2], operands[3]));
2048 DONE;
2049 })
2050
2051 ;; For the scalar operations, use operand1 for the upper words that aren't
2052 ;; modified, so restrict the forms that are generated.
2053 ;; Scalar version of fnmsub
2054 (define_insn "fma4_vmfnmsub<mode>4"
2055 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2056 (vec_merge:SSEMODEF2P
2057 (minus:SSEMODEF2P
2058 (mult:SSEMODEF2P
2059 (neg:SSEMODEF2P
2060 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x"))
2061 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2062 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2063 (match_dup 0)
2064 (const_int 1)))]
2065 "TARGET_FMA4
2066 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)"
2067 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2068 [(set_attr "type" "ssemuladd")
2069 (set_attr "mode" "<MODE>")])
2070
2071 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2072
2073 (define_insn "fma4i_fmadd<mode>4256"
2074 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2075 (unspec:FMA4MODEF4
2076 [(plus:FMA4MODEF4
2077 (mult:FMA4MODEF4
2078 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
2079 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
2080 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
2081 UNSPEC_FMA4_INTRINSIC))]
2082 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2083 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2084 [(set_attr "type" "ssemuladd")
2085 (set_attr "mode" "<MODE>")])
2086
2087 (define_insn "fma4i_fmsub<mode>4256"
2088 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2089 (unspec:FMA4MODEF4
2090 [(minus:FMA4MODEF4
2091 (mult:FMA4MODEF4
2092 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
2093 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
2094 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
2095 UNSPEC_FMA4_INTRINSIC))]
2096 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2097 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2098 [(set_attr "type" "ssemuladd")
2099 (set_attr "mode" "<MODE>")])
2100
2101 (define_insn "fma4i_fnmadd<mode>4256"
2102 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2103 (unspec:FMA4MODEF4
2104 [(minus:FMA4MODEF4
2105 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
2106 (mult:FMA4MODEF4
2107 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")
2108 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm")))]
2109 UNSPEC_FMA4_INTRINSIC))]
2110 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2111 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2112 [(set_attr "type" "ssemuladd")
2113 (set_attr "mode" "<MODE>")])
2114
2115 (define_insn "fma4i_fnmsub<mode>4256"
2116 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
2117 (unspec:FMA4MODEF4
2118 [(minus:FMA4MODEF4
2119 (mult:FMA4MODEF4
2120 (neg:FMA4MODEF4
2121 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x"))
2122 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm"))
2123 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
2124 UNSPEC_FMA4_INTRINSIC))]
2125 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2126 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2127 [(set_attr "type" "ssemuladd")
2128 (set_attr "mode" "<MODE>")])
2129 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2130
2131 (define_insn "fma4i_fmadd<mode>4"
2132 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2133 (unspec:SSEMODEF2P
2134 [(plus:SSEMODEF2P
2135 (mult:SSEMODEF2P
2136 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2137 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2138 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2139 UNSPEC_FMA4_INTRINSIC))]
2140 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2141 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2142 [(set_attr "type" "ssemuladd")
2143 (set_attr "mode" "<MODE>")])
2144
2145 (define_insn "fma4i_fmsub<mode>4"
2146 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2147 (unspec:SSEMODEF2P
2148 [(minus:SSEMODEF2P
2149 (mult:SSEMODEF2P
2150 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2151 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2152 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2153 UNSPEC_FMA4_INTRINSIC))]
2154 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2155 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2156 [(set_attr "type" "ssemuladd")
2157 (set_attr "mode" "<MODE>")])
2158
2159 (define_insn "fma4i_fnmadd<mode>4"
2160 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2161 (unspec:SSEMODEF2P
2162 [(minus:SSEMODEF2P
2163 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2164 (mult:SSEMODEF2P
2165 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2166 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))]
2167 UNSPEC_FMA4_INTRINSIC))]
2168 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2169 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2170 [(set_attr "type" "ssemuladd")
2171 (set_attr "mode" "<MODE>")])
2172
2173 (define_insn "fma4i_fnmsub<mode>4"
2174 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2175 (unspec:SSEMODEF2P
2176 [(minus:SSEMODEF2P
2177 (mult:SSEMODEF2P
2178 (neg:SSEMODEF2P
2179 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x"))
2180 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2181 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2182 UNSPEC_FMA4_INTRINSIC))]
2183 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2184 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2185 [(set_attr "type" "ssemuladd")
2186 (set_attr "mode" "<MODE>")])
2187
2188 ;; For the scalar operations, use operand1 for the upper words that aren't
2189 ;; modified, so restrict the forms that are accepted.
2190 (define_insn "fma4i_vmfmadd<mode>4"
2191 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2192 (unspec:SSEMODEF2P
2193 [(vec_merge:SSEMODEF2P
2194 (plus:SSEMODEF2P
2195 (mult:SSEMODEF2P
2196 (match_operand:SSEMODEF2P 1 "register_operand" "x,x")
2197 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2198 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2199 (match_dup 0)
2200 (const_int 1))]
2201 UNSPEC_FMA4_INTRINSIC))]
2202 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2203 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2204 [(set_attr "type" "ssemuladd")
2205 (set_attr "mode" "<ssescalarmode>")])
2206
2207 (define_insn "fma4i_vmfmsub<mode>4"
2208 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2209 (unspec:SSEMODEF2P
2210 [(vec_merge:SSEMODEF2P
2211 (minus:SSEMODEF2P
2212 (mult:SSEMODEF2P
2213 (match_operand:SSEMODEF2P 1 "register_operand" "x,x")
2214 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2215 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2216 (match_dup 0)
2217 (const_int 1))]
2218 UNSPEC_FMA4_INTRINSIC))]
2219 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2220 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2221 [(set_attr "type" "ssemuladd")
2222 (set_attr "mode" "<ssescalarmode>")])
2223
2224 (define_insn "fma4i_vmfnmadd<mode>4"
2225 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2226 (unspec:SSEMODEF2P
2227 [(vec_merge:SSEMODEF2P
2228 (minus:SSEMODEF2P
2229 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2230 (mult:SSEMODEF2P
2231 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")
2232 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
2233 (match_dup 0)
2234 (const_int 1))]
2235 UNSPEC_FMA4_INTRINSIC))]
2236 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)"
2237 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2238 [(set_attr "type" "ssemuladd")
2239 (set_attr "mode" "<ssescalarmode>")])
2240
2241 (define_insn "fma4i_vmfnmsub<mode>4"
2242 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2243 (unspec:SSEMODEF2P
2244 [(vec_merge:SSEMODEF2P
2245 (minus:SSEMODEF2P
2246 (mult:SSEMODEF2P
2247 (neg:SSEMODEF2P
2248 (match_operand:SSEMODEF2P 1 "register_operand" "x,x"))
2249 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
2250 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2251 (match_dup 0)
2252 (const_int 1))]
2253 UNSPEC_FMA4_INTRINSIC))]
2254 "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
2255 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2256 [(set_attr "type" "ssemuladd")
2257 (set_attr "mode" "<ssescalarmode>")])
2258
2259 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2260 ;;
2261 ;; FMA4 Parallel floating point multiply addsub and subadd operations
2262 ;;
2263 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2264
2265 (define_insn "fma4_fmaddsubv8sf4"
2266 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2267 (vec_merge:V8SF
2268 (plus:V8SF
2269 (mult:V8SF
2270 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2271 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2272 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2273 (minus:V8SF
2274 (mult:V8SF
2275 (match_dup 1)
2276 (match_dup 2))
2277 (match_dup 3))
2278 (const_int 170)))]
2279 "TARGET_FMA4
2280 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2281 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2282 [(set_attr "type" "ssemuladd")
2283 (set_attr "mode" "V8SF")])
2284
2285 (define_insn "fma4_fmaddsubv4df4"
2286 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2287 (vec_merge:V4DF
2288 (plus:V4DF
2289 (mult:V4DF
2290 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2291 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2292 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2293 (minus:V4DF
2294 (mult:V4DF
2295 (match_dup 1)
2296 (match_dup 2))
2297 (match_dup 3))
2298 (const_int 10)))]
2299 "TARGET_FMA4
2300 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2301 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2302 [(set_attr "type" "ssemuladd")
2303 (set_attr "mode" "V4DF")])
2304
2305 (define_insn "fma4_fmaddsubv4sf4"
2306 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2307 (vec_merge:V4SF
2308 (plus:V4SF
2309 (mult:V4SF
2310 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2311 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2312 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2313 (minus:V4SF
2314 (mult:V4SF
2315 (match_dup 1)
2316 (match_dup 2))
2317 (match_dup 3))
2318 (const_int 10)))]
2319 "TARGET_FMA4
2320 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2321 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2322 [(set_attr "type" "ssemuladd")
2323 (set_attr "mode" "V4SF")])
2324
2325 (define_insn "fma4_fmaddsubv2df4"
2326 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2327 (vec_merge:V2DF
2328 (plus:V2DF
2329 (mult:V2DF
2330 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2331 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2332 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2333 (minus:V2DF
2334 (mult:V2DF
2335 (match_dup 1)
2336 (match_dup 2))
2337 (match_dup 3))
2338 (const_int 2)))]
2339 "TARGET_FMA4
2340 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2341 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2342 [(set_attr "type" "ssemuladd")
2343 (set_attr "mode" "V2DF")])
2344
2345 (define_insn "fma4_fmsubaddv8sf4"
2346 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2347 (vec_merge:V8SF
2348 (plus:V8SF
2349 (mult:V8SF
2350 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2351 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2352 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2353 (minus:V8SF
2354 (mult:V8SF
2355 (match_dup 1)
2356 (match_dup 2))
2357 (match_dup 3))
2358 (const_int 85)))]
2359 "TARGET_FMA4
2360 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2361 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2362 [(set_attr "type" "ssemuladd")
2363 (set_attr "mode" "V8SF")])
2364
2365 (define_insn "fma4_fmsubaddv4df4"
2366 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2367 (vec_merge:V4DF
2368 (plus:V4DF
2369 (mult:V4DF
2370 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2371 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2372 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2373 (minus:V4DF
2374 (mult:V4DF
2375 (match_dup 1)
2376 (match_dup 2))
2377 (match_dup 3))
2378 (const_int 5)))]
2379 "TARGET_FMA4
2380 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2381 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2382 [(set_attr "type" "ssemuladd")
2383 (set_attr "mode" "V4DF")])
2384
2385 (define_insn "fma4_fmsubaddv4sf4"
2386 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2387 (vec_merge:V4SF
2388 (plus:V4SF
2389 (mult:V4SF
2390 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2391 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2392 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2393 (minus:V4SF
2394 (mult:V4SF
2395 (match_dup 1)
2396 (match_dup 2))
2397 (match_dup 3))
2398 (const_int 5)))]
2399 "TARGET_FMA4
2400 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2401 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2402 [(set_attr "type" "ssemuladd")
2403 (set_attr "mode" "V4SF")])
2404
2405 (define_insn "fma4_fmsubaddv2df4"
2406 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2407 (vec_merge:V2DF
2408 (plus:V2DF
2409 (mult:V2DF
2410 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2411 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2412 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2413 (minus:V2DF
2414 (mult:V2DF
2415 (match_dup 1)
2416 (match_dup 2))
2417 (match_dup 3))
2418 (const_int 1)))]
2419 "TARGET_FMA4
2420 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2421 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2422 [(set_attr "type" "ssemuladd")
2423 (set_attr "mode" "V2DF")])
2424
2425 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2426
2427 (define_insn "fma4i_fmaddsubv8sf4"
2428 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2429 (unspec:V8SF
2430 [(vec_merge:V8SF
2431 (plus:V8SF
2432 (mult:V8SF
2433 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2434 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2435 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2436 (minus:V8SF
2437 (mult:V8SF
2438 (match_dup 1)
2439 (match_dup 2))
2440 (match_dup 3))
2441 (const_int 170))]
2442 UNSPEC_FMA4_INTRINSIC))]
2443 "TARGET_FMA4
2444 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2445 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2446 [(set_attr "type" "ssemuladd")
2447 (set_attr "mode" "V8SF")])
2448
2449 (define_insn "fma4i_fmaddsubv4df4"
2450 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2451 (unspec:V4DF
2452 [(vec_merge:V4DF
2453 (plus:V4DF
2454 (mult:V4DF
2455 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2456 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2457 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2458 (minus:V4DF
2459 (mult:V4DF
2460 (match_dup 1)
2461 (match_dup 2))
2462 (match_dup 3))
2463 (const_int 10))]
2464 UNSPEC_FMA4_INTRINSIC))]
2465 "TARGET_FMA4
2466 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2467 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2468 [(set_attr "type" "ssemuladd")
2469 (set_attr "mode" "V4DF")])
2470
2471 (define_insn "fma4i_fmaddsubv4sf4"
2472 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2473 (unspec:V4SF
2474 [(vec_merge:V4SF
2475 (plus:V4SF
2476 (mult:V4SF
2477 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2478 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2479 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2480 (minus:V4SF
2481 (mult:V4SF
2482 (match_dup 1)
2483 (match_dup 2))
2484 (match_dup 3))
2485 (const_int 10))]
2486 UNSPEC_FMA4_INTRINSIC))]
2487 "TARGET_FMA4
2488 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2489 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2490 [(set_attr "type" "ssemuladd")
2491 (set_attr "mode" "V4SF")])
2492
2493 (define_insn "fma4i_fmaddsubv2df4"
2494 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2495 (unspec:V2DF
2496 [(vec_merge:V2DF
2497 (plus:V2DF
2498 (mult:V2DF
2499 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2500 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2501 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2502 (minus:V2DF
2503 (mult:V2DF
2504 (match_dup 1)
2505 (match_dup 2))
2506 (match_dup 3))
2507 (const_int 2))]
2508 UNSPEC_FMA4_INTRINSIC))]
2509 "TARGET_FMA4
2510 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2511 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2512 [(set_attr "type" "ssemuladd")
2513 (set_attr "mode" "V2DF")])
2514
2515 (define_insn "fma4i_fmsubaddv8sf4"
2516 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2517 (unspec:V8SF
2518 [(vec_merge:V8SF
2519 (plus:V8SF
2520 (mult:V8SF
2521 (match_operand:V8SF 1 "nonimmediate_operand" "x,x")
2522 (match_operand:V8SF 2 "nonimmediate_operand" "x,xm"))
2523 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2524 (minus:V8SF
2525 (mult:V8SF
2526 (match_dup 1)
2527 (match_dup 2))
2528 (match_dup 3))
2529 (const_int 85))]
2530 UNSPEC_FMA4_INTRINSIC))]
2531 "TARGET_FMA4
2532 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2533 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2534 [(set_attr "type" "ssemuladd")
2535 (set_attr "mode" "V8SF")])
2536
2537 (define_insn "fma4i_fmsubaddv4df4"
2538 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2539 (unspec:V4DF
2540 [(vec_merge:V4DF
2541 (plus:V4DF
2542 (mult:V4DF
2543 (match_operand:V4DF 1 "nonimmediate_operand" "x,x")
2544 (match_operand:V4DF 2 "nonimmediate_operand" "x,xm"))
2545 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2546 (minus:V4DF
2547 (mult:V4DF
2548 (match_dup 1)
2549 (match_dup 2))
2550 (match_dup 3))
2551 (const_int 5))]
2552 UNSPEC_FMA4_INTRINSIC))]
2553 "TARGET_FMA4
2554 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2555 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2556 [(set_attr "type" "ssemuladd")
2557 (set_attr "mode" "V4DF")])
2558
2559 (define_insn "fma4i_fmsubaddv4sf4"
2560 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2561 (unspec:V4SF
2562 [(vec_merge:V4SF
2563 (plus:V4SF
2564 (mult:V4SF
2565 (match_operand:V4SF 1 "nonimmediate_operand" "x,x")
2566 (match_operand:V4SF 2 "nonimmediate_operand" "x,xm"))
2567 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2568 (minus:V4SF
2569 (mult:V4SF
2570 (match_dup 1)
2571 (match_dup 2))
2572 (match_dup 3))
2573 (const_int 5))]
2574 UNSPEC_FMA4_INTRINSIC))]
2575 "TARGET_FMA4
2576 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2577 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2578 [(set_attr "type" "ssemuladd")
2579 (set_attr "mode" "V4SF")])
2580
2581 (define_insn "fma4i_fmsubaddv2df4"
2582 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2583 (unspec:V2DF
2584 [(vec_merge:V2DF
2585 (plus:V2DF
2586 (mult:V2DF
2587 (match_operand:V2DF 1 "nonimmediate_operand" "x,x")
2588 (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))
2589 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2590 (minus:V2DF
2591 (mult:V2DF
2592 (match_dup 1)
2593 (match_dup 2))
2594 (match_dup 3))
2595 (const_int 1))]
2596 UNSPEC_FMA4_INTRINSIC))]
2597 "TARGET_FMA4
2598 && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)"
2599 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2600 [(set_attr "type" "ssemuladd")
2601 (set_attr "mode" "V2DF")])
2602
2603 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2604 ;;
2605 ;; Parallel single-precision floating point conversion operations
2606 ;;
2607 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2608
2609 (define_insn "sse_cvtpi2ps"
2610 [(set (match_operand:V4SF 0 "register_operand" "=x")
2611 (vec_merge:V4SF
2612 (vec_duplicate:V4SF
2613 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2614 (match_operand:V4SF 1 "register_operand" "0")
2615 (const_int 3)))]
2616 "TARGET_SSE"
2617 "cvtpi2ps\t{%2, %0|%0, %2}"
2618 [(set_attr "type" "ssecvt")
2619 (set_attr "mode" "V4SF")])
2620
2621 (define_insn "sse_cvtps2pi"
2622 [(set (match_operand:V2SI 0 "register_operand" "=y")
2623 (vec_select:V2SI
2624 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2625 UNSPEC_FIX_NOTRUNC)
2626 (parallel [(const_int 0) (const_int 1)])))]
2627 "TARGET_SSE"
2628 "cvtps2pi\t{%1, %0|%0, %1}"
2629 [(set_attr "type" "ssecvt")
2630 (set_attr "unit" "mmx")
2631 (set_attr "mode" "DI")])
2632
2633 (define_insn "sse_cvttps2pi"
2634 [(set (match_operand:V2SI 0 "register_operand" "=y")
2635 (vec_select:V2SI
2636 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2637 (parallel [(const_int 0) (const_int 1)])))]
2638 "TARGET_SSE"
2639 "cvttps2pi\t{%1, %0|%0, %1}"
2640 [(set_attr "type" "ssecvt")
2641 (set_attr "unit" "mmx")
2642 (set_attr "prefix_rep" "0")
2643 (set_attr "mode" "SF")])
2644
2645 (define_insn "*avx_cvtsi2ss"
2646 [(set (match_operand:V4SF 0 "register_operand" "=x")
2647 (vec_merge:V4SF
2648 (vec_duplicate:V4SF
2649 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2650 (match_operand:V4SF 1 "register_operand" "x")
2651 (const_int 1)))]
2652 "TARGET_AVX"
2653 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2654 [(set_attr "type" "sseicvt")
2655 (set_attr "prefix" "vex")
2656 (set_attr "mode" "SF")])
2657
2658 (define_insn "sse_cvtsi2ss"
2659 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2660 (vec_merge:V4SF
2661 (vec_duplicate:V4SF
2662 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2663 (match_operand:V4SF 1 "register_operand" "0,0")
2664 (const_int 1)))]
2665 "TARGET_SSE"
2666 "cvtsi2ss\t{%2, %0|%0, %2}"
2667 [(set_attr "type" "sseicvt")
2668 (set_attr "athlon_decode" "vector,double")
2669 (set_attr "amdfam10_decode" "vector,double")
2670 (set_attr "mode" "SF")])
2671
2672 (define_insn "*avx_cvtsi2ssq"
2673 [(set (match_operand:V4SF 0 "register_operand" "=x")
2674 (vec_merge:V4SF
2675 (vec_duplicate:V4SF
2676 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2677 (match_operand:V4SF 1 "register_operand" "x")
2678 (const_int 1)))]
2679 "TARGET_AVX && TARGET_64BIT"
2680 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2681 [(set_attr "type" "sseicvt")
2682 (set_attr "length_vex" "4")
2683 (set_attr "prefix" "vex")
2684 (set_attr "mode" "SF")])
2685
2686 (define_insn "sse_cvtsi2ssq"
2687 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2688 (vec_merge:V4SF
2689 (vec_duplicate:V4SF
2690 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2691 (match_operand:V4SF 1 "register_operand" "0,0")
2692 (const_int 1)))]
2693 "TARGET_SSE && TARGET_64BIT"
2694 "cvtsi2ssq\t{%2, %0|%0, %2}"
2695 [(set_attr "type" "sseicvt")
2696 (set_attr "prefix_rex" "1")
2697 (set_attr "athlon_decode" "vector,double")
2698 (set_attr "amdfam10_decode" "vector,double")
2699 (set_attr "mode" "SF")])
2700
2701 (define_insn "sse_cvtss2si"
2702 [(set (match_operand:SI 0 "register_operand" "=r,r")
2703 (unspec:SI
2704 [(vec_select:SF
2705 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2706 (parallel [(const_int 0)]))]
2707 UNSPEC_FIX_NOTRUNC))]
2708 "TARGET_SSE"
2709 "%vcvtss2si\t{%1, %0|%0, %1}"
2710 [(set_attr "type" "sseicvt")
2711 (set_attr "athlon_decode" "double,vector")
2712 (set_attr "prefix_rep" "1")
2713 (set_attr "prefix" "maybe_vex")
2714 (set_attr "mode" "SI")])
2715
2716 (define_insn "sse_cvtss2si_2"
2717 [(set (match_operand:SI 0 "register_operand" "=r,r")
2718 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2719 UNSPEC_FIX_NOTRUNC))]
2720 "TARGET_SSE"
2721 "%vcvtss2si\t{%1, %0|%0, %1}"
2722 [(set_attr "type" "sseicvt")
2723 (set_attr "athlon_decode" "double,vector")
2724 (set_attr "amdfam10_decode" "double,double")
2725 (set_attr "prefix_rep" "1")
2726 (set_attr "prefix" "maybe_vex")
2727 (set_attr "mode" "SI")])
2728
2729 (define_insn "sse_cvtss2siq"
2730 [(set (match_operand:DI 0 "register_operand" "=r,r")
2731 (unspec:DI
2732 [(vec_select:SF
2733 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2734 (parallel [(const_int 0)]))]
2735 UNSPEC_FIX_NOTRUNC))]
2736 "TARGET_SSE && TARGET_64BIT"
2737 "%vcvtss2siq\t{%1, %0|%0, %1}"
2738 [(set_attr "type" "sseicvt")
2739 (set_attr "athlon_decode" "double,vector")
2740 (set_attr "prefix_rep" "1")
2741 (set_attr "prefix" "maybe_vex")
2742 (set_attr "mode" "DI")])
2743
2744 (define_insn "sse_cvtss2siq_2"
2745 [(set (match_operand:DI 0 "register_operand" "=r,r")
2746 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2747 UNSPEC_FIX_NOTRUNC))]
2748 "TARGET_SSE && TARGET_64BIT"
2749 "%vcvtss2siq\t{%1, %0|%0, %1}"
2750 [(set_attr "type" "sseicvt")
2751 (set_attr "athlon_decode" "double,vector")
2752 (set_attr "amdfam10_decode" "double,double")
2753 (set_attr "prefix_rep" "1")
2754 (set_attr "prefix" "maybe_vex")
2755 (set_attr "mode" "DI")])
2756
2757 (define_insn "sse_cvttss2si"
2758 [(set (match_operand:SI 0 "register_operand" "=r,r")
2759 (fix:SI
2760 (vec_select:SF
2761 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2762 (parallel [(const_int 0)]))))]
2763 "TARGET_SSE"
2764 "%vcvttss2si\t{%1, %0|%0, %1}"
2765 [(set_attr "type" "sseicvt")
2766 (set_attr "athlon_decode" "double,vector")
2767 (set_attr "amdfam10_decode" "double,double")
2768 (set_attr "prefix_rep" "1")
2769 (set_attr "prefix" "maybe_vex")
2770 (set_attr "mode" "SI")])
2771
2772 (define_insn "sse_cvttss2siq"
2773 [(set (match_operand:DI 0 "register_operand" "=r,r")
2774 (fix:DI
2775 (vec_select:SF
2776 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2777 (parallel [(const_int 0)]))))]
2778 "TARGET_SSE && TARGET_64BIT"
2779 "%vcvttss2siq\t{%1, %0|%0, %1}"
2780 [(set_attr "type" "sseicvt")
2781 (set_attr "athlon_decode" "double,vector")
2782 (set_attr "amdfam10_decode" "double,double")
2783 (set_attr "prefix_rep" "1")
2784 (set_attr "prefix" "maybe_vex")
2785 (set_attr "mode" "DI")])
2786
2787 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2788 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2789 (float:AVXMODEDCVTDQ2PS
2790 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2791 "TARGET_AVX"
2792 "vcvtdq2ps\t{%1, %0|%0, %1}"
2793 [(set_attr "type" "ssecvt")
2794 (set_attr "prefix" "vex")
2795 (set_attr "mode" "<avxvecmode>")])
2796
2797 (define_insn "sse2_cvtdq2ps"
2798 [(set (match_operand:V4SF 0 "register_operand" "=x")
2799 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2800 "TARGET_SSE2"
2801 "cvtdq2ps\t{%1, %0|%0, %1}"
2802 [(set_attr "type" "ssecvt")
2803 (set_attr "mode" "V4SF")])
2804
2805 (define_expand "sse2_cvtudq2ps"
2806 [(set (match_dup 5)
2807 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2808 (set (match_dup 6)
2809 (lt:V4SF (match_dup 5) (match_dup 3)))
2810 (set (match_dup 7)
2811 (and:V4SF (match_dup 6) (match_dup 4)))
2812 (set (match_operand:V4SF 0 "register_operand" "")
2813 (plus:V4SF (match_dup 5) (match_dup 7)))]
2814 "TARGET_SSE2"
2815 {
2816 REAL_VALUE_TYPE TWO32r;
2817 rtx x;
2818 int i;
2819
2820 real_ldexp (&TWO32r, &dconst1, 32);
2821 x = const_double_from_real_value (TWO32r, SFmode);
2822
2823 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2824 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2825
2826 for (i = 5; i < 8; i++)
2827 operands[i] = gen_reg_rtx (V4SFmode);
2828 })
2829
2830 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2831 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2832 (unspec:AVXMODEDCVTPS2DQ
2833 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2834 UNSPEC_FIX_NOTRUNC))]
2835 "TARGET_AVX"
2836 "vcvtps2dq\t{%1, %0|%0, %1}"
2837 [(set_attr "type" "ssecvt")
2838 (set_attr "prefix" "vex")
2839 (set_attr "mode" "<avxvecmode>")])
2840
2841 (define_insn "sse2_cvtps2dq"
2842 [(set (match_operand:V4SI 0 "register_operand" "=x")
2843 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2844 UNSPEC_FIX_NOTRUNC))]
2845 "TARGET_SSE2"
2846 "cvtps2dq\t{%1, %0|%0, %1}"
2847 [(set_attr "type" "ssecvt")
2848 (set_attr "prefix_data16" "1")
2849 (set_attr "mode" "TI")])
2850
2851 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2852 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2853 (fix:AVXMODEDCVTPS2DQ
2854 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2855 "TARGET_AVX"
2856 "vcvttps2dq\t{%1, %0|%0, %1}"
2857 [(set_attr "type" "ssecvt")
2858 (set_attr "prefix" "vex")
2859 (set_attr "mode" "<avxvecmode>")])
2860
2861 (define_insn "sse2_cvttps2dq"
2862 [(set (match_operand:V4SI 0 "register_operand" "=x")
2863 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2864 "TARGET_SSE2"
2865 "cvttps2dq\t{%1, %0|%0, %1}"
2866 [(set_attr "type" "ssecvt")
2867 (set_attr "prefix_rep" "1")
2868 (set_attr "prefix_data16" "0")
2869 (set_attr "mode" "TI")])
2870
2871 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2872 ;;
2873 ;; Parallel double-precision floating point conversion operations
2874 ;;
2875 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2876
2877 (define_insn "sse2_cvtpi2pd"
2878 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2879 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2880 "TARGET_SSE2"
2881 "cvtpi2pd\t{%1, %0|%0, %1}"
2882 [(set_attr "type" "ssecvt")
2883 (set_attr "unit" "mmx,*")
2884 (set_attr "prefix_data16" "1,*")
2885 (set_attr "mode" "V2DF")])
2886
2887 (define_insn "sse2_cvtpd2pi"
2888 [(set (match_operand:V2SI 0 "register_operand" "=y")
2889 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2890 UNSPEC_FIX_NOTRUNC))]
2891 "TARGET_SSE2"
2892 "cvtpd2pi\t{%1, %0|%0, %1}"
2893 [(set_attr "type" "ssecvt")
2894 (set_attr "unit" "mmx")
2895 (set_attr "prefix_data16" "1")
2896 (set_attr "mode" "DI")])
2897
2898 (define_insn "sse2_cvttpd2pi"
2899 [(set (match_operand:V2SI 0 "register_operand" "=y")
2900 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2901 "TARGET_SSE2"
2902 "cvttpd2pi\t{%1, %0|%0, %1}"
2903 [(set_attr "type" "ssecvt")
2904 (set_attr "unit" "mmx")
2905 (set_attr "prefix_data16" "1")
2906 (set_attr "mode" "TI")])
2907
2908 (define_insn "*avx_cvtsi2sd"
2909 [(set (match_operand:V2DF 0 "register_operand" "=x")
2910 (vec_merge:V2DF
2911 (vec_duplicate:V2DF
2912 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2913 (match_operand:V2DF 1 "register_operand" "x")
2914 (const_int 1)))]
2915 "TARGET_AVX"
2916 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2917 [(set_attr "type" "sseicvt")
2918 (set_attr "prefix" "vex")
2919 (set_attr "mode" "DF")])
2920
2921 (define_insn "sse2_cvtsi2sd"
2922 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2923 (vec_merge:V2DF
2924 (vec_duplicate:V2DF
2925 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2926 (match_operand:V2DF 1 "register_operand" "0,0")
2927 (const_int 1)))]
2928 "TARGET_SSE2"
2929 "cvtsi2sd\t{%2, %0|%0, %2}"
2930 [(set_attr "type" "sseicvt")
2931 (set_attr "mode" "DF")
2932 (set_attr "athlon_decode" "double,direct")
2933 (set_attr "amdfam10_decode" "vector,double")])
2934
2935 (define_insn "*avx_cvtsi2sdq"
2936 [(set (match_operand:V2DF 0 "register_operand" "=x")
2937 (vec_merge:V2DF
2938 (vec_duplicate:V2DF
2939 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2940 (match_operand:V2DF 1 "register_operand" "x")
2941 (const_int 1)))]
2942 "TARGET_AVX && TARGET_64BIT"
2943 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2944 [(set_attr "type" "sseicvt")
2945 (set_attr "length_vex" "4")
2946 (set_attr "prefix" "vex")
2947 (set_attr "mode" "DF")])
2948
2949 (define_insn "sse2_cvtsi2sdq"
2950 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2951 (vec_merge:V2DF
2952 (vec_duplicate:V2DF
2953 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2954 (match_operand:V2DF 1 "register_operand" "0,0")
2955 (const_int 1)))]
2956 "TARGET_SSE2 && TARGET_64BIT"
2957 "cvtsi2sdq\t{%2, %0|%0, %2}"
2958 [(set_attr "type" "sseicvt")
2959 (set_attr "prefix_rex" "1")
2960 (set_attr "mode" "DF")
2961 (set_attr "athlon_decode" "double,direct")
2962 (set_attr "amdfam10_decode" "vector,double")])
2963
2964 (define_insn "sse2_cvtsd2si"
2965 [(set (match_operand:SI 0 "register_operand" "=r,r")
2966 (unspec:SI
2967 [(vec_select:DF
2968 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2969 (parallel [(const_int 0)]))]
2970 UNSPEC_FIX_NOTRUNC))]
2971 "TARGET_SSE2"
2972 "%vcvtsd2si\t{%1, %0|%0, %1}"
2973 [(set_attr "type" "sseicvt")
2974 (set_attr "athlon_decode" "double,vector")
2975 (set_attr "prefix_rep" "1")
2976 (set_attr "prefix" "maybe_vex")
2977 (set_attr "mode" "SI")])
2978
2979 (define_insn "sse2_cvtsd2si_2"
2980 [(set (match_operand:SI 0 "register_operand" "=r,r")
2981 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2982 UNSPEC_FIX_NOTRUNC))]
2983 "TARGET_SSE2"
2984 "%vcvtsd2si\t{%1, %0|%0, %1}"
2985 [(set_attr "type" "sseicvt")
2986 (set_attr "athlon_decode" "double,vector")
2987 (set_attr "amdfam10_decode" "double,double")
2988 (set_attr "prefix_rep" "1")
2989 (set_attr "prefix" "maybe_vex")
2990 (set_attr "mode" "SI")])
2991
2992 (define_insn "sse2_cvtsd2siq"
2993 [(set (match_operand:DI 0 "register_operand" "=r,r")
2994 (unspec:DI
2995 [(vec_select:DF
2996 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2997 (parallel [(const_int 0)]))]
2998 UNSPEC_FIX_NOTRUNC))]
2999 "TARGET_SSE2 && TARGET_64BIT"
3000 "%vcvtsd2siq\t{%1, %0|%0, %1}"
3001 [(set_attr "type" "sseicvt")
3002 (set_attr "athlon_decode" "double,vector")
3003 (set_attr "prefix_rep" "1")
3004 (set_attr "prefix" "maybe_vex")
3005 (set_attr "mode" "DI")])
3006
3007 (define_insn "sse2_cvtsd2siq_2"
3008 [(set (match_operand:DI 0 "register_operand" "=r,r")
3009 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
3010 UNSPEC_FIX_NOTRUNC))]
3011 "TARGET_SSE2 && TARGET_64BIT"
3012 "%vcvtsd2siq\t{%1, %0|%0, %1}"
3013 [(set_attr "type" "sseicvt")
3014 (set_attr "athlon_decode" "double,vector")
3015 (set_attr "amdfam10_decode" "double,double")
3016 (set_attr "prefix_rep" "1")
3017 (set_attr "prefix" "maybe_vex")
3018 (set_attr "mode" "DI")])
3019
3020 (define_insn "sse2_cvttsd2si"
3021 [(set (match_operand:SI 0 "register_operand" "=r,r")
3022 (fix:SI
3023 (vec_select:DF
3024 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
3025 (parallel [(const_int 0)]))))]
3026 "TARGET_SSE2"
3027 "%vcvttsd2si\t{%1, %0|%0, %1}"
3028 [(set_attr "type" "sseicvt")
3029 (set_attr "prefix_rep" "1")
3030 (set_attr "prefix" "maybe_vex")
3031 (set_attr "mode" "SI")
3032 (set_attr "athlon_decode" "double,vector")
3033 (set_attr "amdfam10_decode" "double,double")])
3034
3035 (define_insn "sse2_cvttsd2siq"
3036 [(set (match_operand:DI 0 "register_operand" "=r,r")
3037 (fix:DI
3038 (vec_select:DF
3039 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
3040 (parallel [(const_int 0)]))))]
3041 "TARGET_SSE2 && TARGET_64BIT"
3042 "%vcvttsd2siq\t{%1, %0|%0, %1}"
3043 [(set_attr "type" "sseicvt")
3044 (set_attr "prefix_rep" "1")
3045 (set_attr "prefix" "maybe_vex")
3046 (set_attr "mode" "DI")
3047 (set_attr "athlon_decode" "double,vector")
3048 (set_attr "amdfam10_decode" "double,double")])
3049
3050 (define_insn "avx_cvtdq2pd256"
3051 [(set (match_operand:V4DF 0 "register_operand" "=x")
3052 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
3053 "TARGET_AVX"
3054 "vcvtdq2pd\t{%1, %0|%0, %1}"
3055 [(set_attr "type" "ssecvt")
3056 (set_attr "prefix" "vex")
3057 (set_attr "mode" "V4DF")])
3058
3059 (define_insn "sse2_cvtdq2pd"
3060 [(set (match_operand:V2DF 0 "register_operand" "=x")
3061 (float:V2DF
3062 (vec_select:V2SI
3063 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3064 (parallel [(const_int 0) (const_int 1)]))))]
3065 "TARGET_SSE2"
3066 "%vcvtdq2pd\t{%1, %0|%0, %1}"
3067 [(set_attr "type" "ssecvt")
3068 (set_attr "prefix" "maybe_vex")
3069 (set_attr "mode" "V2DF")])
3070
3071 (define_insn "avx_cvtpd2dq256"
3072 [(set (match_operand:V4SI 0 "register_operand" "=x")
3073 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
3074 UNSPEC_FIX_NOTRUNC))]
3075 "TARGET_AVX"
3076 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
3077 [(set_attr "type" "ssecvt")
3078 (set_attr "prefix" "vex")
3079 (set_attr "mode" "OI")])
3080
3081 (define_expand "sse2_cvtpd2dq"
3082 [(set (match_operand:V4SI 0 "register_operand" "")
3083 (vec_concat:V4SI
3084 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
3085 UNSPEC_FIX_NOTRUNC)
3086 (match_dup 2)))]
3087 "TARGET_SSE2"
3088 "operands[2] = CONST0_RTX (V2SImode);")
3089
3090 (define_insn "*sse2_cvtpd2dq"
3091 [(set (match_operand:V4SI 0 "register_operand" "=x")
3092 (vec_concat:V4SI
3093 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3094 UNSPEC_FIX_NOTRUNC)
3095 (match_operand:V2SI 2 "const0_operand" "")))]
3096 "TARGET_SSE2"
3097 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
3098 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
3099 [(set_attr "type" "ssecvt")
3100 (set_attr "prefix_rep" "1")
3101 (set_attr "prefix_data16" "0")
3102 (set_attr "prefix" "maybe_vex")
3103 (set_attr "mode" "TI")
3104 (set_attr "amdfam10_decode" "double")])
3105
3106 (define_insn "avx_cvttpd2dq256"
3107 [(set (match_operand:V4SI 0 "register_operand" "=x")
3108 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3109 "TARGET_AVX"
3110 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
3111 [(set_attr "type" "ssecvt")
3112 (set_attr "prefix" "vex")
3113 (set_attr "mode" "OI")])
3114
3115 (define_expand "sse2_cvttpd2dq"
3116 [(set (match_operand:V4SI 0 "register_operand" "")
3117 (vec_concat:V4SI
3118 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
3119 (match_dup 2)))]
3120 "TARGET_SSE2"
3121 "operands[2] = CONST0_RTX (V2SImode);")
3122
3123 (define_insn "*sse2_cvttpd2dq"
3124 [(set (match_operand:V4SI 0 "register_operand" "=x")
3125 (vec_concat:V4SI
3126 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3127 (match_operand:V2SI 2 "const0_operand" "")))]
3128 "TARGET_SSE2"
3129 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
3130 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
3131 [(set_attr "type" "ssecvt")
3132 (set_attr "prefix" "maybe_vex")
3133 (set_attr "mode" "TI")
3134 (set_attr "amdfam10_decode" "double")])
3135
3136 (define_insn "*avx_cvtsd2ss"
3137 [(set (match_operand:V4SF 0 "register_operand" "=x")
3138 (vec_merge:V4SF
3139 (vec_duplicate:V4SF
3140 (float_truncate:V2SF
3141 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
3142 (match_operand:V4SF 1 "register_operand" "x")
3143 (const_int 1)))]
3144 "TARGET_AVX"
3145 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
3146 [(set_attr "type" "ssecvt")
3147 (set_attr "prefix" "vex")
3148 (set_attr "mode" "SF")])
3149
3150 (define_insn "sse2_cvtsd2ss"
3151 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3152 (vec_merge:V4SF
3153 (vec_duplicate:V4SF
3154 (float_truncate:V2SF
3155 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
3156 (match_operand:V4SF 1 "register_operand" "0,0")
3157 (const_int 1)))]
3158 "TARGET_SSE2"
3159 "cvtsd2ss\t{%2, %0|%0, %2}"
3160 [(set_attr "type" "ssecvt")
3161 (set_attr "athlon_decode" "vector,double")
3162 (set_attr "amdfam10_decode" "vector,double")
3163 (set_attr "mode" "SF")])
3164
3165 (define_insn "*avx_cvtss2sd"
3166 [(set (match_operand:V2DF 0 "register_operand" "=x")
3167 (vec_merge:V2DF
3168 (float_extend:V2DF
3169 (vec_select:V2SF
3170 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
3171 (parallel [(const_int 0) (const_int 1)])))
3172 (match_operand:V2DF 1 "register_operand" "x")
3173 (const_int 1)))]
3174 "TARGET_AVX"
3175 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
3176 [(set_attr "type" "ssecvt")
3177 (set_attr "prefix" "vex")
3178 (set_attr "mode" "DF")])
3179
3180 (define_insn "sse2_cvtss2sd"
3181 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3182 (vec_merge:V2DF
3183 (float_extend:V2DF
3184 (vec_select:V2SF
3185 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
3186 (parallel [(const_int 0) (const_int 1)])))
3187 (match_operand:V2DF 1 "register_operand" "0,0")
3188 (const_int 1)))]
3189 "TARGET_SSE2"
3190 "cvtss2sd\t{%2, %0|%0, %2}"
3191 [(set_attr "type" "ssecvt")
3192 (set_attr "amdfam10_decode" "vector,double")
3193 (set_attr "mode" "DF")])
3194
3195 (define_insn "avx_cvtpd2ps256"
3196 [(set (match_operand:V4SF 0 "register_operand" "=x")
3197 (float_truncate:V4SF
3198 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3199 "TARGET_AVX"
3200 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3201 [(set_attr "type" "ssecvt")
3202 (set_attr "prefix" "vex")
3203 (set_attr "mode" "V4SF")])
3204
3205 (define_expand "sse2_cvtpd2ps"
3206 [(set (match_operand:V4SF 0 "register_operand" "")
3207 (vec_concat:V4SF
3208 (float_truncate:V2SF
3209 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3210 (match_dup 2)))]
3211 "TARGET_SSE2"
3212 "operands[2] = CONST0_RTX (V2SFmode);")
3213
3214 (define_insn "*sse2_cvtpd2ps"
3215 [(set (match_operand:V4SF 0 "register_operand" "=x")
3216 (vec_concat:V4SF
3217 (float_truncate:V2SF
3218 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3219 (match_operand:V2SF 2 "const0_operand" "")))]
3220 "TARGET_SSE2"
3221 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3222 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3223 [(set_attr "type" "ssecvt")
3224 (set_attr "prefix_data16" "1")
3225 (set_attr "prefix" "maybe_vex")
3226 (set_attr "mode" "V4SF")
3227 (set_attr "amdfam10_decode" "double")])
3228
3229 (define_insn "avx_cvtps2pd256"
3230 [(set (match_operand:V4DF 0 "register_operand" "=x")
3231 (float_extend:V4DF
3232 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3233 "TARGET_AVX"
3234 "vcvtps2pd\t{%1, %0|%0, %1}"
3235 [(set_attr "type" "ssecvt")
3236 (set_attr "prefix" "vex")
3237 (set_attr "mode" "V4DF")])
3238
3239 (define_insn "sse2_cvtps2pd"
3240 [(set (match_operand:V2DF 0 "register_operand" "=x")
3241 (float_extend:V2DF
3242 (vec_select:V2SF
3243 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3244 (parallel [(const_int 0) (const_int 1)]))))]
3245 "TARGET_SSE2"
3246 "%vcvtps2pd\t{%1, %0|%0, %1}"
3247 [(set_attr "type" "ssecvt")
3248 (set_attr "prefix" "maybe_vex")
3249 (set_attr "mode" "V2DF")
3250 (set_attr "prefix_data16" "0")
3251 (set_attr "amdfam10_decode" "direct")])
3252
3253 (define_expand "vec_unpacks_hi_v4sf"
3254 [(set (match_dup 2)
3255 (vec_select:V4SF
3256 (vec_concat:V8SF
3257 (match_dup 2)
3258 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3259 (parallel [(const_int 6)
3260 (const_int 7)
3261 (const_int 2)
3262 (const_int 3)])))
3263 (set (match_operand:V2DF 0 "register_operand" "")
3264 (float_extend:V2DF
3265 (vec_select:V2SF
3266 (match_dup 2)
3267 (parallel [(const_int 0) (const_int 1)]))))]
3268 "TARGET_SSE2"
3269 {
3270 operands[2] = gen_reg_rtx (V4SFmode);
3271 })
3272
3273 (define_expand "vec_unpacks_lo_v4sf"
3274 [(set (match_operand:V2DF 0 "register_operand" "")
3275 (float_extend:V2DF
3276 (vec_select:V2SF
3277 (match_operand:V4SF 1 "nonimmediate_operand" "")
3278 (parallel [(const_int 0) (const_int 1)]))))]
3279 "TARGET_SSE2")
3280
3281 (define_expand "vec_unpacks_float_hi_v8hi"
3282 [(match_operand:V4SF 0 "register_operand" "")
3283 (match_operand:V8HI 1 "register_operand" "")]
3284 "TARGET_SSE2"
3285 {
3286 rtx tmp = gen_reg_rtx (V4SImode);
3287
3288 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3289 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3290 DONE;
3291 })
3292
3293 (define_expand "vec_unpacks_float_lo_v8hi"
3294 [(match_operand:V4SF 0 "register_operand" "")
3295 (match_operand:V8HI 1 "register_operand" "")]
3296 "TARGET_SSE2"
3297 {
3298 rtx tmp = gen_reg_rtx (V4SImode);
3299
3300 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3301 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3302 DONE;
3303 })
3304
3305 (define_expand "vec_unpacku_float_hi_v8hi"
3306 [(match_operand:V4SF 0 "register_operand" "")
3307 (match_operand:V8HI 1 "register_operand" "")]
3308 "TARGET_SSE2"
3309 {
3310 rtx tmp = gen_reg_rtx (V4SImode);
3311
3312 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3313 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3314 DONE;
3315 })
3316
3317 (define_expand "vec_unpacku_float_lo_v8hi"
3318 [(match_operand:V4SF 0 "register_operand" "")
3319 (match_operand:V8HI 1 "register_operand" "")]
3320 "TARGET_SSE2"
3321 {
3322 rtx tmp = gen_reg_rtx (V4SImode);
3323
3324 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3325 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3326 DONE;
3327 })
3328
3329 (define_expand "vec_unpacks_float_hi_v4si"
3330 [(set (match_dup 2)
3331 (vec_select:V4SI
3332 (match_operand:V4SI 1 "nonimmediate_operand" "")
3333 (parallel [(const_int 2)
3334 (const_int 3)
3335 (const_int 2)
3336 (const_int 3)])))
3337 (set (match_operand:V2DF 0 "register_operand" "")
3338 (float:V2DF
3339 (vec_select:V2SI
3340 (match_dup 2)
3341 (parallel [(const_int 0) (const_int 1)]))))]
3342 "TARGET_SSE2"
3343 "operands[2] = gen_reg_rtx (V4SImode);")
3344
3345 (define_expand "vec_unpacks_float_lo_v4si"
3346 [(set (match_operand:V2DF 0 "register_operand" "")
3347 (float:V2DF
3348 (vec_select:V2SI
3349 (match_operand:V4SI 1 "nonimmediate_operand" "")
3350 (parallel [(const_int 0) (const_int 1)]))))]
3351 "TARGET_SSE2")
3352
3353 (define_expand "vec_unpacku_float_hi_v4si"
3354 [(set (match_dup 5)
3355 (vec_select:V4SI
3356 (match_operand:V4SI 1 "nonimmediate_operand" "")
3357 (parallel [(const_int 2)
3358 (const_int 3)
3359 (const_int 2)
3360 (const_int 3)])))
3361 (set (match_dup 6)
3362 (float:V2DF
3363 (vec_select:V2SI
3364 (match_dup 5)
3365 (parallel [(const_int 0) (const_int 1)]))))
3366 (set (match_dup 7)
3367 (lt:V2DF (match_dup 6) (match_dup 3)))
3368 (set (match_dup 8)
3369 (and:V2DF (match_dup 7) (match_dup 4)))
3370 (set (match_operand:V2DF 0 "register_operand" "")
3371 (plus:V2DF (match_dup 6) (match_dup 8)))]
3372 "TARGET_SSE2"
3373 {
3374 REAL_VALUE_TYPE TWO32r;
3375 rtx x;
3376 int i;
3377
3378 real_ldexp (&TWO32r, &dconst1, 32);
3379 x = const_double_from_real_value (TWO32r, DFmode);
3380
3381 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3382 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3383
3384 operands[5] = gen_reg_rtx (V4SImode);
3385
3386 for (i = 6; i < 9; i++)
3387 operands[i] = gen_reg_rtx (V2DFmode);
3388 })
3389
3390 (define_expand "vec_unpacku_float_lo_v4si"
3391 [(set (match_dup 5)
3392 (float:V2DF
3393 (vec_select:V2SI
3394 (match_operand:V4SI 1 "nonimmediate_operand" "")
3395 (parallel [(const_int 0) (const_int 1)]))))
3396 (set (match_dup 6)
3397 (lt:V2DF (match_dup 5) (match_dup 3)))
3398 (set (match_dup 7)
3399 (and:V2DF (match_dup 6) (match_dup 4)))
3400 (set (match_operand:V2DF 0 "register_operand" "")
3401 (plus:V2DF (match_dup 5) (match_dup 7)))]
3402 "TARGET_SSE2"
3403 {
3404 REAL_VALUE_TYPE TWO32r;
3405 rtx x;
3406 int i;
3407
3408 real_ldexp (&TWO32r, &dconst1, 32);
3409 x = const_double_from_real_value (TWO32r, DFmode);
3410
3411 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3412 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3413
3414 for (i = 5; i < 8; i++)
3415 operands[i] = gen_reg_rtx (V2DFmode);
3416 })
3417
3418 (define_expand "vec_pack_trunc_v2df"
3419 [(match_operand:V4SF 0 "register_operand" "")
3420 (match_operand:V2DF 1 "nonimmediate_operand" "")
3421 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3422 "TARGET_SSE2"
3423 {
3424 rtx r1, r2;
3425
3426 r1 = gen_reg_rtx (V4SFmode);
3427 r2 = gen_reg_rtx (V4SFmode);
3428
3429 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3430 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3431 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3432 DONE;
3433 })
3434
3435 (define_expand "vec_pack_sfix_trunc_v2df"
3436 [(match_operand:V4SI 0 "register_operand" "")
3437 (match_operand:V2DF 1 "nonimmediate_operand" "")
3438 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3439 "TARGET_SSE2"
3440 {
3441 rtx r1, r2;
3442
3443 r1 = gen_reg_rtx (V4SImode);
3444 r2 = gen_reg_rtx (V4SImode);
3445
3446 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3447 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3448 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3449 gen_lowpart (V2DImode, r1),
3450 gen_lowpart (V2DImode, r2)));
3451 DONE;
3452 })
3453
3454 (define_expand "vec_pack_sfix_v2df"
3455 [(match_operand:V4SI 0 "register_operand" "")
3456 (match_operand:V2DF 1 "nonimmediate_operand" "")
3457 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3458 "TARGET_SSE2"
3459 {
3460 rtx r1, r2;
3461
3462 r1 = gen_reg_rtx (V4SImode);
3463 r2 = gen_reg_rtx (V4SImode);
3464
3465 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3466 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3467 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
3468 gen_lowpart (V2DImode, r1),
3469 gen_lowpart (V2DImode, r2)));
3470 DONE;
3471 })
3472
3473 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3474 ;;
3475 ;; Parallel single-precision floating point element swizzling
3476 ;;
3477 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3478
3479 (define_expand "sse_movhlps_exp"
3480 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3481 (vec_select:V4SF
3482 (vec_concat:V8SF
3483 (match_operand:V4SF 1 "nonimmediate_operand" "")
3484 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3485 (parallel [(const_int 6)
3486 (const_int 7)
3487 (const_int 2)
3488 (const_int 3)])))]
3489 "TARGET_SSE"
3490 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3491
3492 (define_insn "*avx_movhlps"
3493 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3494 (vec_select:V4SF
3495 (vec_concat:V8SF
3496 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3497 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3498 (parallel [(const_int 6)
3499 (const_int 7)
3500 (const_int 2)
3501 (const_int 3)])))]
3502 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3503 "@
3504 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3505 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3506 vmovhps\t{%2, %0|%0, %2}"
3507 [(set_attr "type" "ssemov")
3508 (set_attr "prefix" "vex")
3509 (set_attr "mode" "V4SF,V2SF,V2SF")])
3510
3511 (define_insn "sse_movhlps"
3512 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3513 (vec_select:V4SF
3514 (vec_concat:V8SF
3515 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3516 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3517 (parallel [(const_int 6)
3518 (const_int 7)
3519 (const_int 2)
3520 (const_int 3)])))]
3521 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3522 "@
3523 movhlps\t{%2, %0|%0, %2}
3524 movlps\t{%H2, %0|%0, %H2}
3525 movhps\t{%2, %0|%0, %2}"
3526 [(set_attr "type" "ssemov")
3527 (set_attr "mode" "V4SF,V2SF,V2SF")])
3528
3529 (define_expand "sse_movlhps_exp"
3530 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3531 (vec_select:V4SF
3532 (vec_concat:V8SF
3533 (match_operand:V4SF 1 "nonimmediate_operand" "")
3534 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3535 (parallel [(const_int 0)
3536 (const_int 1)
3537 (const_int 4)
3538 (const_int 5)])))]
3539 "TARGET_SSE"
3540 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3541
3542 (define_insn "*avx_movlhps"
3543 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3544 (vec_select:V4SF
3545 (vec_concat:V8SF
3546 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3547 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3548 (parallel [(const_int 0)
3549 (const_int 1)
3550 (const_int 4)
3551 (const_int 5)])))]
3552 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3553 "@
3554 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3555 vmovhps\t{%2, %1, %0|%0, %1, %2}
3556 vmovlps\t{%2, %H0|%H0, %2}"
3557 [(set_attr "type" "ssemov")
3558 (set_attr "prefix" "vex")
3559 (set_attr "mode" "V4SF,V2SF,V2SF")])
3560
3561 (define_insn "sse_movlhps"
3562 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3563 (vec_select:V4SF
3564 (vec_concat:V8SF
3565 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3566 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3567 (parallel [(const_int 0)
3568 (const_int 1)
3569 (const_int 4)
3570 (const_int 5)])))]
3571 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3572 "@
3573 movlhps\t{%2, %0|%0, %2}
3574 movhps\t{%2, %0|%0, %2}
3575 movlps\t{%2, %H0|%H0, %2}"
3576 [(set_attr "type" "ssemov")
3577 (set_attr "mode" "V4SF,V2SF,V2SF")])
3578
3579 (define_insn "avx_unpckhps256"
3580 [(set (match_operand:V8SF 0 "register_operand" "=x")
3581 (vec_select:V8SF
3582 (vec_concat:V16SF
3583 (match_operand:V8SF 1 "register_operand" "x")
3584 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3585 (parallel [(const_int 2) (const_int 10)
3586 (const_int 3) (const_int 11)
3587 (const_int 6) (const_int 14)
3588 (const_int 7) (const_int 15)])))]
3589 "TARGET_AVX"
3590 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3591 [(set_attr "type" "sselog")
3592 (set_attr "prefix" "vex")
3593 (set_attr "mode" "V8SF")])
3594
3595 (define_insn "*avx_unpckhps"
3596 [(set (match_operand:V4SF 0 "register_operand" "=x")
3597 (vec_select:V4SF
3598 (vec_concat:V8SF
3599 (match_operand:V4SF 1 "register_operand" "x")
3600 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3601 (parallel [(const_int 2) (const_int 6)
3602 (const_int 3) (const_int 7)])))]
3603 "TARGET_AVX"
3604 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3605 [(set_attr "type" "sselog")
3606 (set_attr "prefix" "vex")
3607 (set_attr "mode" "V4SF")])
3608
3609 (define_insn "sse_unpckhps"
3610 [(set (match_operand:V4SF 0 "register_operand" "=x")
3611 (vec_select:V4SF
3612 (vec_concat:V8SF
3613 (match_operand:V4SF 1 "register_operand" "0")
3614 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3615 (parallel [(const_int 2) (const_int 6)
3616 (const_int 3) (const_int 7)])))]
3617 "TARGET_SSE"
3618 "unpckhps\t{%2, %0|%0, %2}"
3619 [(set_attr "type" "sselog")
3620 (set_attr "mode" "V4SF")])
3621
3622 (define_insn "avx_unpcklps256"
3623 [(set (match_operand:V8SF 0 "register_operand" "=x")
3624 (vec_select:V8SF
3625 (vec_concat:V16SF
3626 (match_operand:V8SF 1 "register_operand" "x")
3627 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3628 (parallel [(const_int 0) (const_int 8)
3629 (const_int 1) (const_int 9)
3630 (const_int 4) (const_int 12)
3631 (const_int 5) (const_int 13)])))]
3632 "TARGET_AVX"
3633 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3634 [(set_attr "type" "sselog")
3635 (set_attr "prefix" "vex")
3636 (set_attr "mode" "V8SF")])
3637
3638 (define_insn "*avx_unpcklps"
3639 [(set (match_operand:V4SF 0 "register_operand" "=x")
3640 (vec_select:V4SF
3641 (vec_concat:V8SF
3642 (match_operand:V4SF 1 "register_operand" "x")
3643 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3644 (parallel [(const_int 0) (const_int 4)
3645 (const_int 1) (const_int 5)])))]
3646 "TARGET_AVX"
3647 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3648 [(set_attr "type" "sselog")
3649 (set_attr "prefix" "vex")
3650 (set_attr "mode" "V4SF")])
3651
3652 (define_insn "sse_unpcklps"
3653 [(set (match_operand:V4SF 0 "register_operand" "=x")
3654 (vec_select:V4SF
3655 (vec_concat:V8SF
3656 (match_operand:V4SF 1 "register_operand" "0")
3657 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3658 (parallel [(const_int 0) (const_int 4)
3659 (const_int 1) (const_int 5)])))]
3660 "TARGET_SSE"
3661 "unpcklps\t{%2, %0|%0, %2}"
3662 [(set_attr "type" "sselog")
3663 (set_attr "mode" "V4SF")])
3664
3665 ;; These are modeled with the same vec_concat as the others so that we
3666 ;; capture users of shufps that can use the new instructions
3667 (define_insn "avx_movshdup256"
3668 [(set (match_operand:V8SF 0 "register_operand" "=x")
3669 (vec_select:V8SF
3670 (vec_concat:V16SF
3671 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3672 (match_dup 1))
3673 (parallel [(const_int 1) (const_int 1)
3674 (const_int 3) (const_int 3)
3675 (const_int 5) (const_int 5)
3676 (const_int 7) (const_int 7)])))]
3677 "TARGET_AVX"
3678 "vmovshdup\t{%1, %0|%0, %1}"
3679 [(set_attr "type" "sse")
3680 (set_attr "prefix" "vex")
3681 (set_attr "mode" "V8SF")])
3682
3683 (define_insn "sse3_movshdup"
3684 [(set (match_operand:V4SF 0 "register_operand" "=x")
3685 (vec_select:V4SF
3686 (vec_concat:V8SF
3687 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3688 (match_dup 1))
3689 (parallel [(const_int 1)
3690 (const_int 1)
3691 (const_int 7)
3692 (const_int 7)])))]
3693 "TARGET_SSE3"
3694 "%vmovshdup\t{%1, %0|%0, %1}"
3695 [(set_attr "type" "sse")
3696 (set_attr "prefix_rep" "1")
3697 (set_attr "prefix" "maybe_vex")
3698 (set_attr "mode" "V4SF")])
3699
3700 (define_insn "avx_movsldup256"
3701 [(set (match_operand:V8SF 0 "register_operand" "=x")
3702 (vec_select:V8SF
3703 (vec_concat:V16SF
3704 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3705 (match_dup 1))
3706 (parallel [(const_int 0) (const_int 0)
3707 (const_int 2) (const_int 2)
3708 (const_int 4) (const_int 4)
3709 (const_int 6) (const_int 6)])))]
3710 "TARGET_AVX"
3711 "vmovsldup\t{%1, %0|%0, %1}"
3712 [(set_attr "type" "sse")
3713 (set_attr "prefix" "vex")
3714 (set_attr "mode" "V8SF")])
3715
3716 (define_insn "sse3_movsldup"
3717 [(set (match_operand:V4SF 0 "register_operand" "=x")
3718 (vec_select:V4SF
3719 (vec_concat:V8SF
3720 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3721 (match_dup 1))
3722 (parallel [(const_int 0)
3723 (const_int 0)
3724 (const_int 6)
3725 (const_int 6)])))]
3726 "TARGET_SSE3"
3727 "%vmovsldup\t{%1, %0|%0, %1}"
3728 [(set_attr "type" "sse")
3729 (set_attr "prefix_rep" "1")
3730 (set_attr "prefix" "maybe_vex")
3731 (set_attr "mode" "V4SF")])
3732
3733 (define_expand "avx_shufps256"
3734 [(match_operand:V8SF 0 "register_operand" "")
3735 (match_operand:V8SF 1 "register_operand" "")
3736 (match_operand:V8SF 2 "nonimmediate_operand" "")
3737 (match_operand:SI 3 "const_int_operand" "")]
3738 "TARGET_AVX"
3739 {
3740 int mask = INTVAL (operands[3]);
3741 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3742 GEN_INT ((mask >> 0) & 3),
3743 GEN_INT ((mask >> 2) & 3),
3744 GEN_INT (((mask >> 4) & 3) + 8),
3745 GEN_INT (((mask >> 6) & 3) + 8),
3746 GEN_INT (((mask >> 0) & 3) + 4),
3747 GEN_INT (((mask >> 2) & 3) + 4),
3748 GEN_INT (((mask >> 4) & 3) + 12),
3749 GEN_INT (((mask >> 6) & 3) + 12)));
3750 DONE;
3751 })
3752
3753 ;; One bit in mask selects 2 elements.
3754 (define_insn "avx_shufps256_1"
3755 [(set (match_operand:V8SF 0 "register_operand" "=x")
3756 (vec_select:V8SF
3757 (vec_concat:V16SF
3758 (match_operand:V8SF 1 "register_operand" "x")
3759 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3760 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3761 (match_operand 4 "const_0_to_3_operand" "")
3762 (match_operand 5 "const_8_to_11_operand" "")
3763 (match_operand 6 "const_8_to_11_operand" "")
3764 (match_operand 7 "const_4_to_7_operand" "")
3765 (match_operand 8 "const_4_to_7_operand" "")
3766 (match_operand 9 "const_12_to_15_operand" "")
3767 (match_operand 10 "const_12_to_15_operand" "")])))]
3768 "TARGET_AVX
3769 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3770 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3771 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3772 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3773 {
3774 int mask;
3775 mask = INTVAL (operands[3]);
3776 mask |= INTVAL (operands[4]) << 2;
3777 mask |= (INTVAL (operands[5]) - 8) << 4;
3778 mask |= (INTVAL (operands[6]) - 8) << 6;
3779 operands[3] = GEN_INT (mask);
3780
3781 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3782 }
3783 [(set_attr "type" "sselog")
3784 (set_attr "length_immediate" "1")
3785 (set_attr "prefix" "vex")
3786 (set_attr "mode" "V8SF")])
3787
3788 (define_expand "sse_shufps"
3789 [(match_operand:V4SF 0 "register_operand" "")
3790 (match_operand:V4SF 1 "register_operand" "")
3791 (match_operand:V4SF 2 "nonimmediate_operand" "")
3792 (match_operand:SI 3 "const_int_operand" "")]
3793 "TARGET_SSE"
3794 {
3795 int mask = INTVAL (operands[3]);
3796 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3797 GEN_INT ((mask >> 0) & 3),
3798 GEN_INT ((mask >> 2) & 3),
3799 GEN_INT (((mask >> 4) & 3) + 4),
3800 GEN_INT (((mask >> 6) & 3) + 4)));
3801 DONE;
3802 })
3803
3804 (define_insn "*avx_shufps_<mode>"
3805 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3806 (vec_select:SSEMODE4S
3807 (vec_concat:<ssedoublesizemode>
3808 (match_operand:SSEMODE4S 1 "register_operand" "x")
3809 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3810 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3811 (match_operand 4 "const_0_to_3_operand" "")
3812 (match_operand 5 "const_4_to_7_operand" "")
3813 (match_operand 6 "const_4_to_7_operand" "")])))]
3814 "TARGET_AVX"
3815 {
3816 int mask = 0;
3817 mask |= INTVAL (operands[3]) << 0;
3818 mask |= INTVAL (operands[4]) << 2;
3819 mask |= (INTVAL (operands[5]) - 4) << 4;
3820 mask |= (INTVAL (operands[6]) - 4) << 6;
3821 operands[3] = GEN_INT (mask);
3822
3823 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3824 }
3825 [(set_attr "type" "sselog")
3826 (set_attr "length_immediate" "1")
3827 (set_attr "prefix" "vex")
3828 (set_attr "mode" "V4SF")])
3829
3830 (define_insn "sse_shufps_<mode>"
3831 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3832 (vec_select:SSEMODE4S
3833 (vec_concat:<ssedoublesizemode>
3834 (match_operand:SSEMODE4S 1 "register_operand" "0")
3835 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3836 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3837 (match_operand 4 "const_0_to_3_operand" "")
3838 (match_operand 5 "const_4_to_7_operand" "")
3839 (match_operand 6 "const_4_to_7_operand" "")])))]
3840 "TARGET_SSE"
3841 {
3842 int mask = 0;
3843 mask |= INTVAL (operands[3]) << 0;
3844 mask |= INTVAL (operands[4]) << 2;
3845 mask |= (INTVAL (operands[5]) - 4) << 4;
3846 mask |= (INTVAL (operands[6]) - 4) << 6;
3847 operands[3] = GEN_INT (mask);
3848
3849 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3850 }
3851 [(set_attr "type" "sselog")
3852 (set_attr "length_immediate" "1")
3853 (set_attr "mode" "V4SF")])
3854
3855 (define_insn "sse_storehps"
3856 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3857 (vec_select:V2SF
3858 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3859 (parallel [(const_int 2) (const_int 3)])))]
3860 "TARGET_SSE"
3861 "@
3862 %vmovhps\t{%1, %0|%0, %1}
3863 %vmovhlps\t{%1, %d0|%d0, %1}
3864 %vmovlps\t{%H1, %d0|%d0, %H1}"
3865 [(set_attr "type" "ssemov")
3866 (set_attr "prefix" "maybe_vex")
3867 (set_attr "mode" "V2SF,V4SF,V2SF")])
3868
3869 (define_expand "sse_loadhps_exp"
3870 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3871 (vec_concat:V4SF
3872 (vec_select:V2SF
3873 (match_operand:V4SF 1 "nonimmediate_operand" "")
3874 (parallel [(const_int 0) (const_int 1)]))
3875 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3876 "TARGET_SSE"
3877 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3878
3879 (define_insn "*avx_loadhps"
3880 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3881 (vec_concat:V4SF
3882 (vec_select:V2SF
3883 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3884 (parallel [(const_int 0) (const_int 1)]))
3885 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3886 "TARGET_AVX"
3887 "@
3888 vmovhps\t{%2, %1, %0|%0, %1, %2}
3889 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3890 vmovlps\t{%2, %H0|%H0, %2}"
3891 [(set_attr "type" "ssemov")
3892 (set_attr "prefix" "vex")
3893 (set_attr "mode" "V2SF,V4SF,V2SF")])
3894
3895 (define_insn "sse_loadhps"
3896 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3897 (vec_concat:V4SF
3898 (vec_select:V2SF
3899 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3900 (parallel [(const_int 0) (const_int 1)]))
3901 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3902 "TARGET_SSE"
3903 "@
3904 movhps\t{%2, %0|%0, %2}
3905 movlhps\t{%2, %0|%0, %2}
3906 movlps\t{%2, %H0|%H0, %2}"
3907 [(set_attr "type" "ssemov")
3908 (set_attr "mode" "V2SF,V4SF,V2SF")])
3909
3910 (define_insn "*avx_storelps"
3911 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3912 (vec_select:V2SF
3913 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3914 (parallel [(const_int 0) (const_int 1)])))]
3915 "TARGET_AVX"
3916 "@
3917 vmovlps\t{%1, %0|%0, %1}
3918 vmovaps\t{%1, %0|%0, %1}
3919 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3920 [(set_attr "type" "ssemov")
3921 (set_attr "prefix" "vex")
3922 (set_attr "mode" "V2SF,V2DF,V2SF")])
3923
3924 (define_insn "sse_storelps"
3925 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3926 (vec_select:V2SF
3927 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3928 (parallel [(const_int 0) (const_int 1)])))]
3929 "TARGET_SSE"
3930 "@
3931 movlps\t{%1, %0|%0, %1}
3932 movaps\t{%1, %0|%0, %1}
3933 movlps\t{%1, %0|%0, %1}"
3934 [(set_attr "type" "ssemov")
3935 (set_attr "mode" "V2SF,V4SF,V2SF")])
3936
3937 (define_expand "sse_loadlps_exp"
3938 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3939 (vec_concat:V4SF
3940 (match_operand:V2SF 2 "nonimmediate_operand" "")
3941 (vec_select:V2SF
3942 (match_operand:V4SF 1 "nonimmediate_operand" "")
3943 (parallel [(const_int 2) (const_int 3)]))))]
3944 "TARGET_SSE"
3945 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3946
3947 (define_insn "*avx_loadlps"
3948 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3949 (vec_concat:V4SF
3950 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3951 (vec_select:V2SF
3952 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3953 (parallel [(const_int 2) (const_int 3)]))))]
3954 "TARGET_AVX"
3955 "@
3956 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3957 vmovlps\t{%2, %1, %0|%0, %1, %2}
3958 vmovlps\t{%2, %0|%0, %2}"
3959 [(set_attr "type" "sselog,ssemov,ssemov")
3960 (set_attr "length_immediate" "1,*,*")
3961 (set_attr "prefix" "vex")
3962 (set_attr "mode" "V4SF,V2SF,V2SF")])
3963
3964 (define_insn "sse_loadlps"
3965 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3966 (vec_concat:V4SF
3967 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3968 (vec_select:V2SF
3969 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3970 (parallel [(const_int 2) (const_int 3)]))))]
3971 "TARGET_SSE"
3972 "@
3973 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3974 movlps\t{%2, %0|%0, %2}
3975 movlps\t{%2, %0|%0, %2}"
3976 [(set_attr "type" "sselog,ssemov,ssemov")
3977 (set_attr "length_immediate" "1,*,*")
3978 (set_attr "mode" "V4SF,V2SF,V2SF")])
3979
3980 (define_insn "*avx_movss"
3981 [(set (match_operand:V4SF 0 "register_operand" "=x")
3982 (vec_merge:V4SF
3983 (match_operand:V4SF 2 "register_operand" "x")
3984 (match_operand:V4SF 1 "register_operand" "x")
3985 (const_int 1)))]
3986 "TARGET_AVX"
3987 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3988 [(set_attr "type" "ssemov")
3989 (set_attr "prefix" "vex")
3990 (set_attr "mode" "SF")])
3991
3992 (define_insn "sse_movss"
3993 [(set (match_operand:V4SF 0 "register_operand" "=x")
3994 (vec_merge:V4SF
3995 (match_operand:V4SF 2 "register_operand" "x")
3996 (match_operand:V4SF 1 "register_operand" "0")
3997 (const_int 1)))]
3998 "TARGET_SSE"
3999 "movss\t{%2, %0|%0, %2}"
4000 [(set_attr "type" "ssemov")
4001 (set_attr "mode" "SF")])
4002
4003 (define_insn "*vec_dupv4sf_avx"
4004 [(set (match_operand:V4SF 0 "register_operand" "=x")
4005 (vec_duplicate:V4SF
4006 (match_operand:SF 1 "register_operand" "x")))]
4007 "TARGET_AVX"
4008 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
4009 [(set_attr "type" "sselog1")
4010 (set_attr "length_immediate" "1")
4011 (set_attr "prefix" "vex")
4012 (set_attr "mode" "V4SF")])
4013
4014 (define_insn "*vec_dupv4sf"
4015 [(set (match_operand:V4SF 0 "register_operand" "=x")
4016 (vec_duplicate:V4SF
4017 (match_operand:SF 1 "register_operand" "0")))]
4018 "TARGET_SSE"
4019 "shufps\t{$0, %0, %0|%0, %0, 0}"
4020 [(set_attr "type" "sselog1")
4021 (set_attr "length_immediate" "1")
4022 (set_attr "mode" "V4SF")])
4023
4024 (define_insn "*vec_concatv2sf_avx"
4025 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
4026 (vec_concat:V2SF
4027 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
4028 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
4029 "TARGET_AVX"
4030 "@
4031 vunpcklps\t{%2, %1, %0|%0, %1, %2}
4032 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
4033 vmovss\t{%1, %0|%0, %1}
4034 punpckldq\t{%2, %0|%0, %2}
4035 movd\t{%1, %0|%0, %1}"
4036 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
4037 (set_attr "length_immediate" "*,1,*,*,*")
4038 (set_attr "prefix_extra" "*,1,*,*,*")
4039 (set (attr "prefix")
4040 (if_then_else (eq_attr "alternative" "3,4")
4041 (const_string "orig")
4042 (const_string "vex")))
4043 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
4044
4045 ;; Although insertps takes register source, we prefer
4046 ;; unpcklps with register source since it is shorter.
4047 (define_insn "*vec_concatv2sf_sse4_1"
4048 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
4049 (vec_concat:V2SF
4050 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
4051 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
4052 "TARGET_SSE4_1"
4053 "@
4054 unpcklps\t{%2, %0|%0, %2}
4055 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
4056 movss\t{%1, %0|%0, %1}
4057 punpckldq\t{%2, %0|%0, %2}
4058 movd\t{%1, %0|%0, %1}"
4059 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
4060 (set_attr "prefix_data16" "*,1,*,*,*")
4061 (set_attr "prefix_extra" "*,1,*,*,*")
4062 (set_attr "length_immediate" "*,1,*,*,*")
4063 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
4064
4065 ;; ??? In theory we can match memory for the MMX alternative, but allowing
4066 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
4067 ;; alternatives pretty much forces the MMX alternative to be chosen.
4068 (define_insn "*vec_concatv2sf_sse"
4069 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
4070 (vec_concat:V2SF
4071 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
4072 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
4073 "TARGET_SSE"
4074 "@
4075 unpcklps\t{%2, %0|%0, %2}
4076 movss\t{%1, %0|%0, %1}
4077 punpckldq\t{%2, %0|%0, %2}
4078 movd\t{%1, %0|%0, %1}"
4079 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
4080 (set_attr "mode" "V4SF,SF,DI,DI")])
4081
4082 (define_insn "*vec_concatv4sf_avx"
4083 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4084 (vec_concat:V4SF
4085 (match_operand:V2SF 1 "register_operand" " x,x")
4086 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
4087 "TARGET_AVX"
4088 "@
4089 vmovlhps\t{%2, %1, %0|%0, %1, %2}
4090 vmovhps\t{%2, %1, %0|%0, %1, %2}"
4091 [(set_attr "type" "ssemov")
4092 (set_attr "prefix" "vex")
4093 (set_attr "mode" "V4SF,V2SF")])
4094
4095 (define_insn "*vec_concatv4sf_sse"
4096 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
4097 (vec_concat:V4SF
4098 (match_operand:V2SF 1 "register_operand" " 0,0")
4099 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
4100 "TARGET_SSE"
4101 "@
4102 movlhps\t{%2, %0|%0, %2}
4103 movhps\t{%2, %0|%0, %2}"
4104 [(set_attr "type" "ssemov")
4105 (set_attr "mode" "V4SF,V2SF")])
4106
4107 (define_expand "vec_init<mode>"
4108 [(match_operand:SSEMODE 0 "register_operand" "")
4109 (match_operand 1 "" "")]
4110 "TARGET_SSE"
4111 {
4112 ix86_expand_vector_init (false, operands[0], operands[1]);
4113 DONE;
4114 })
4115
4116 (define_insn "*vec_setv4sf_0_avx"
4117 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
4118 (vec_merge:V4SF
4119 (vec_duplicate:V4SF
4120 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
4121 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
4122 (const_int 1)))]
4123 "TARGET_AVX"
4124 "@
4125 vmovss\t{%2, %1, %0|%0, %1, %2}
4126 vmovss\t{%2, %0|%0, %2}
4127 vmovd\t{%2, %0|%0, %2}
4128 #"
4129 [(set_attr "type" "ssemov")
4130 (set_attr "prefix" "vex")
4131 (set_attr "mode" "SF")])
4132
4133 (define_insn "vec_setv4sf_0"
4134 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
4135 (vec_merge:V4SF
4136 (vec_duplicate:V4SF
4137 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
4138 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
4139 (const_int 1)))]
4140 "TARGET_SSE"
4141 "@
4142 movss\t{%2, %0|%0, %2}
4143 movss\t{%2, %0|%0, %2}
4144 movd\t{%2, %0|%0, %2}
4145 #"
4146 [(set_attr "type" "ssemov")
4147 (set_attr "mode" "SF")])
4148
4149 ;; A subset is vec_setv4sf.
4150 (define_insn "*vec_setv4sf_avx"
4151 [(set (match_operand:V4SF 0 "register_operand" "=x")
4152 (vec_merge:V4SF
4153 (vec_duplicate:V4SF
4154 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4155 (match_operand:V4SF 1 "register_operand" "x")
4156 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4157 "TARGET_AVX"
4158 {
4159 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4160 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4161 }
4162 [(set_attr "type" "sselog")
4163 (set_attr "prefix_extra" "1")
4164 (set_attr "length_immediate" "1")
4165 (set_attr "prefix" "vex")
4166 (set_attr "mode" "V4SF")])
4167
4168 (define_insn "*vec_setv4sf_sse4_1"
4169 [(set (match_operand:V4SF 0 "register_operand" "=x")
4170 (vec_merge:V4SF
4171 (vec_duplicate:V4SF
4172 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4173 (match_operand:V4SF 1 "register_operand" "0")
4174 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4175 "TARGET_SSE4_1"
4176 {
4177 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4178 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4179 }
4180 [(set_attr "type" "sselog")
4181 (set_attr "prefix_data16" "1")
4182 (set_attr "prefix_extra" "1")
4183 (set_attr "length_immediate" "1")
4184 (set_attr "mode" "V4SF")])
4185
4186 (define_insn "*avx_insertps"
4187 [(set (match_operand:V4SF 0 "register_operand" "=x")
4188 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4189 (match_operand:V4SF 1 "register_operand" "x")
4190 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4191 UNSPEC_INSERTPS))]
4192 "TARGET_AVX"
4193 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4194 [(set_attr "type" "sselog")
4195 (set_attr "prefix" "vex")
4196 (set_attr "prefix_extra" "1")
4197 (set_attr "length_immediate" "1")
4198 (set_attr "mode" "V4SF")])
4199
4200 (define_insn "sse4_1_insertps"
4201 [(set (match_operand:V4SF 0 "register_operand" "=x")
4202 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4203 (match_operand:V4SF 1 "register_operand" "0")
4204 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4205 UNSPEC_INSERTPS))]
4206 "TARGET_SSE4_1"
4207 "insertps\t{%3, %2, %0|%0, %2, %3}";
4208 [(set_attr "type" "sselog")
4209 (set_attr "prefix_data16" "1")
4210 (set_attr "prefix_extra" "1")
4211 (set_attr "length_immediate" "1")
4212 (set_attr "mode" "V4SF")])
4213
4214 (define_split
4215 [(set (match_operand:V4SF 0 "memory_operand" "")
4216 (vec_merge:V4SF
4217 (vec_duplicate:V4SF
4218 (match_operand:SF 1 "nonmemory_operand" ""))
4219 (match_dup 0)
4220 (const_int 1)))]
4221 "TARGET_SSE && reload_completed"
4222 [(const_int 0)]
4223 {
4224 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
4225 DONE;
4226 })
4227
4228 (define_expand "vec_set<mode>"
4229 [(match_operand:SSEMODE 0 "register_operand" "")
4230 (match_operand:<ssescalarmode> 1 "register_operand" "")
4231 (match_operand 2 "const_int_operand" "")]
4232 "TARGET_SSE"
4233 {
4234 ix86_expand_vector_set (false, operands[0], operands[1],
4235 INTVAL (operands[2]));
4236 DONE;
4237 })
4238
4239 (define_insn_and_split "*vec_extractv4sf_0"
4240 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4241 (vec_select:SF
4242 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4243 (parallel [(const_int 0)])))]
4244 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4245 "#"
4246 "&& reload_completed"
4247 [(const_int 0)]
4248 {
4249 rtx op1 = operands[1];
4250 if (REG_P (op1))
4251 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4252 else
4253 op1 = gen_lowpart (SFmode, op1);
4254 emit_move_insn (operands[0], op1);
4255 DONE;
4256 })
4257
4258 (define_expand "avx_vextractf128<mode>"
4259 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4260 (match_operand:AVX256MODE 1 "register_operand" "")
4261 (match_operand:SI 2 "const_0_to_1_operand" "")]
4262 "TARGET_AVX"
4263 {
4264 switch (INTVAL (operands[2]))
4265 {
4266 case 0:
4267 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4268 break;
4269 case 1:
4270 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4271 break;
4272 default:
4273 gcc_unreachable ();
4274 }
4275 DONE;
4276 })
4277
4278 (define_insn "vec_extract_lo_<mode>"
4279 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4280 (vec_select:<avxhalfvecmode>
4281 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4282 (parallel [(const_int 0) (const_int 1)])))]
4283 "TARGET_AVX"
4284 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4285 [(set_attr "type" "sselog")
4286 (set_attr "prefix_extra" "1")
4287 (set_attr "length_immediate" "1")
4288 (set_attr "memory" "none,store")
4289 (set_attr "prefix" "vex")
4290 (set_attr "mode" "V8SF")])
4291
4292 (define_insn "vec_extract_hi_<mode>"
4293 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4294 (vec_select:<avxhalfvecmode>
4295 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4296 (parallel [(const_int 2) (const_int 3)])))]
4297 "TARGET_AVX"
4298 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4299 [(set_attr "type" "sselog")
4300 (set_attr "prefix_extra" "1")
4301 (set_attr "length_immediate" "1")
4302 (set_attr "memory" "none,store")
4303 (set_attr "prefix" "vex")
4304 (set_attr "mode" "V8SF")])
4305
4306 (define_insn "vec_extract_lo_<mode>"
4307 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4308 (vec_select:<avxhalfvecmode>
4309 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4310 (parallel [(const_int 0) (const_int 1)
4311 (const_int 2) (const_int 3)])))]
4312 "TARGET_AVX"
4313 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4314 [(set_attr "type" "sselog")
4315 (set_attr "prefix_extra" "1")
4316 (set_attr "length_immediate" "1")
4317 (set_attr "memory" "none,store")
4318 (set_attr "prefix" "vex")
4319 (set_attr "mode" "V8SF")])
4320
4321 (define_insn "vec_extract_hi_<mode>"
4322 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4323 (vec_select:<avxhalfvecmode>
4324 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4325 (parallel [(const_int 4) (const_int 5)
4326 (const_int 6) (const_int 7)])))]
4327 "TARGET_AVX"
4328 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4329 [(set_attr "type" "sselog")
4330 (set_attr "prefix_extra" "1")
4331 (set_attr "length_immediate" "1")
4332 (set_attr "memory" "none,store")
4333 (set_attr "prefix" "vex")
4334 (set_attr "mode" "V8SF")])
4335
4336 (define_insn "vec_extract_lo_v16hi"
4337 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4338 (vec_select:V8HI
4339 (match_operand:V16HI 1 "register_operand" "x,x")
4340 (parallel [(const_int 0) (const_int 1)
4341 (const_int 2) (const_int 3)
4342 (const_int 4) (const_int 5)
4343 (const_int 6) (const_int 7)])))]
4344 "TARGET_AVX"
4345 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4346 [(set_attr "type" "sselog")
4347 (set_attr "prefix_extra" "1")
4348 (set_attr "length_immediate" "1")
4349 (set_attr "memory" "none,store")
4350 (set_attr "prefix" "vex")
4351 (set_attr "mode" "V8SF")])
4352
4353 (define_insn "vec_extract_hi_v16hi"
4354 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4355 (vec_select:V8HI
4356 (match_operand:V16HI 1 "register_operand" "x,x")
4357 (parallel [(const_int 8) (const_int 9)
4358 (const_int 10) (const_int 11)
4359 (const_int 12) (const_int 13)
4360 (const_int 14) (const_int 15)])))]
4361 "TARGET_AVX"
4362 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4363 [(set_attr "type" "sselog")
4364 (set_attr "prefix_extra" "1")
4365 (set_attr "length_immediate" "1")
4366 (set_attr "memory" "none,store")
4367 (set_attr "prefix" "vex")
4368 (set_attr "mode" "V8SF")])
4369
4370 (define_insn "vec_extract_lo_v32qi"
4371 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4372 (vec_select:V16QI
4373 (match_operand:V32QI 1 "register_operand" "x,x")
4374 (parallel [(const_int 0) (const_int 1)
4375 (const_int 2) (const_int 3)
4376 (const_int 4) (const_int 5)
4377 (const_int 6) (const_int 7)
4378 (const_int 8) (const_int 9)
4379 (const_int 10) (const_int 11)
4380 (const_int 12) (const_int 13)
4381 (const_int 14) (const_int 15)])))]
4382 "TARGET_AVX"
4383 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4384 [(set_attr "type" "sselog")
4385 (set_attr "prefix_extra" "1")
4386 (set_attr "length_immediate" "1")
4387 (set_attr "memory" "none,store")
4388 (set_attr "prefix" "vex")
4389 (set_attr "mode" "V8SF")])
4390
4391 (define_insn "vec_extract_hi_v32qi"
4392 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4393 (vec_select:V16QI
4394 (match_operand:V32QI 1 "register_operand" "x,x")
4395 (parallel [(const_int 16) (const_int 17)
4396 (const_int 18) (const_int 19)
4397 (const_int 20) (const_int 21)
4398 (const_int 22) (const_int 23)
4399 (const_int 24) (const_int 25)
4400 (const_int 26) (const_int 27)
4401 (const_int 28) (const_int 29)
4402 (const_int 30) (const_int 31)])))]
4403 "TARGET_AVX"
4404 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4405 [(set_attr "type" "sselog")
4406 (set_attr "prefix_extra" "1")
4407 (set_attr "length_immediate" "1")
4408 (set_attr "memory" "none,store")
4409 (set_attr "prefix" "vex")
4410 (set_attr "mode" "V8SF")])
4411
4412 (define_insn "*sse4_1_extractps"
4413 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4414 (vec_select:SF
4415 (match_operand:V4SF 1 "register_operand" "x")
4416 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4417 "TARGET_SSE4_1"
4418 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4419 [(set_attr "type" "sselog")
4420 (set_attr "prefix_data16" "1")
4421 (set_attr "prefix_extra" "1")
4422 (set_attr "length_immediate" "1")
4423 (set_attr "prefix" "maybe_vex")
4424 (set_attr "mode" "V4SF")])
4425
4426 (define_insn_and_split "*vec_extract_v4sf_mem"
4427 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4428 (vec_select:SF
4429 (match_operand:V4SF 1 "memory_operand" "o")
4430 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4431 ""
4432 "#"
4433 "reload_completed"
4434 [(const_int 0)]
4435 {
4436 int i = INTVAL (operands[2]);
4437
4438 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4439 DONE;
4440 })
4441
4442 (define_expand "vec_extract<mode>"
4443 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4444 (match_operand:SSEMODE 1 "register_operand" "")
4445 (match_operand 2 "const_int_operand" "")]
4446 "TARGET_SSE"
4447 {
4448 ix86_expand_vector_extract (false, operands[0], operands[1],
4449 INTVAL (operands[2]));
4450 DONE;
4451 })
4452
4453 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4454 ;;
4455 ;; Parallel double-precision floating point element swizzling
4456 ;;
4457 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4458
4459 (define_insn "avx_unpckhpd256"
4460 [(set (match_operand:V4DF 0 "register_operand" "=x")
4461 (vec_select:V4DF
4462 (vec_concat:V8DF
4463 (match_operand:V4DF 1 "register_operand" "x")
4464 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4465 (parallel [(const_int 1) (const_int 5)
4466 (const_int 3) (const_int 7)])))]
4467 "TARGET_AVX"
4468 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4469 [(set_attr "type" "sselog")
4470 (set_attr "prefix" "vex")
4471 (set_attr "mode" "V4DF")])
4472
4473 (define_expand "sse2_unpckhpd_exp"
4474 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4475 (vec_select:V2DF
4476 (vec_concat:V4DF
4477 (match_operand:V2DF 1 "nonimmediate_operand" "")
4478 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4479 (parallel [(const_int 1)
4480 (const_int 3)])))]
4481 "TARGET_SSE2"
4482 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4483
4484 (define_insn "*avx_unpckhpd"
4485 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4486 (vec_select:V2DF
4487 (vec_concat:V4DF
4488 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
4489 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
4490 (parallel [(const_int 1)
4491 (const_int 3)])))]
4492 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4493 "@
4494 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4495 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4496 vmovhpd\t{%1, %0|%0, %1}"
4497 [(set_attr "type" "sselog,ssemov,ssemov")
4498 (set_attr "prefix" "vex")
4499 (set_attr "mode" "V2DF,V1DF,V1DF")])
4500
4501 (define_insn "sse2_unpckhpd"
4502 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4503 (vec_select:V2DF
4504 (vec_concat:V4DF
4505 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4506 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4507 (parallel [(const_int 1)
4508 (const_int 3)])))]
4509 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4510 "@
4511 unpckhpd\t{%2, %0|%0, %2}
4512 movlpd\t{%H1, %0|%0, %H1}
4513 movhpd\t{%1, %0|%0, %1}"
4514 [(set_attr "type" "sselog,ssemov,ssemov")
4515 (set_attr "prefix_data16" "*,1,1")
4516 (set_attr "mode" "V2DF,V1DF,V1DF")])
4517
4518 (define_insn "avx_movddup256"
4519 [(set (match_operand:V4DF 0 "register_operand" "=x")
4520 (vec_select:V4DF
4521 (vec_concat:V8DF
4522 (match_operand:V4DF 1 "nonimmediate_operand" "xm")
4523 (match_dup 1))
4524 (parallel [(const_int 0) (const_int 2)
4525 (const_int 4) (const_int 6)])))]
4526 "TARGET_AVX"
4527 "vmovddup\t{%1, %0|%0, %1}"
4528 [(set_attr "type" "sselog1")
4529 (set_attr "prefix" "vex")
4530 (set_attr "mode" "V4DF")])
4531
4532 (define_insn "*avx_movddup"
4533 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4534 (vec_select:V2DF
4535 (vec_concat:V4DF
4536 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4537 (match_dup 1))
4538 (parallel [(const_int 0)
4539 (const_int 2)])))]
4540 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4541 "@
4542 vmovddup\t{%1, %0|%0, %1}
4543 #"
4544 [(set_attr "type" "sselog1,ssemov")
4545 (set_attr "prefix" "vex")
4546 (set_attr "mode" "V2DF")])
4547
4548 (define_insn "*sse3_movddup"
4549 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
4550 (vec_select:V2DF
4551 (vec_concat:V4DF
4552 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
4553 (match_dup 1))
4554 (parallel [(const_int 0)
4555 (const_int 2)])))]
4556 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4557 "@
4558 movddup\t{%1, %0|%0, %1}
4559 #"
4560 [(set_attr "type" "sselog1,ssemov")
4561 (set_attr "mode" "V2DF")])
4562
4563 (define_split
4564 [(set (match_operand:V2DF 0 "memory_operand" "")
4565 (vec_select:V2DF
4566 (vec_concat:V4DF
4567 (match_operand:V2DF 1 "register_operand" "")
4568 (match_dup 1))
4569 (parallel [(const_int 0)
4570 (const_int 2)])))]
4571 "TARGET_SSE3 && reload_completed"
4572 [(const_int 0)]
4573 {
4574 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4575 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4576 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4577 DONE;
4578 })
4579
4580 (define_insn "avx_unpcklpd256"
4581 [(set (match_operand:V4DF 0 "register_operand" "=x")
4582 (vec_select:V4DF
4583 (vec_concat:V8DF
4584 (match_operand:V4DF 1 "register_operand" "x")
4585 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4586 (parallel [(const_int 0) (const_int 4)
4587 (const_int 2) (const_int 6)])))]
4588 "TARGET_AVX"
4589 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4590 [(set_attr "type" "sselog")
4591 (set_attr "prefix" "vex")
4592 (set_attr "mode" "V4DF")])
4593
4594 (define_expand "sse2_unpcklpd_exp"
4595 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4596 (vec_select:V2DF
4597 (vec_concat:V4DF
4598 (match_operand:V2DF 1 "nonimmediate_operand" "")
4599 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4600 (parallel [(const_int 0)
4601 (const_int 2)])))]
4602 "TARGET_SSE2"
4603 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4604
4605 (define_insn "*avx_unpcklpd"
4606 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4607 (vec_select:V2DF
4608 (vec_concat:V4DF
4609 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
4610 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4611 (parallel [(const_int 0)
4612 (const_int 2)])))]
4613 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4614 "@
4615 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4616 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4617 vmovlpd\t{%2, %H0|%H0, %2}"
4618 [(set_attr "type" "sselog,ssemov,ssemov")
4619 (set_attr "prefix" "vex")
4620 (set_attr "mode" "V2DF,V1DF,V1DF")])
4621
4622 (define_insn "sse2_unpcklpd"
4623 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4624 (vec_select:V2DF
4625 (vec_concat:V4DF
4626 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4627 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4628 (parallel [(const_int 0)
4629 (const_int 2)])))]
4630 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4631 "@
4632 unpcklpd\t{%2, %0|%0, %2}
4633 movhpd\t{%2, %0|%0, %2}
4634 movlpd\t{%2, %H0|%H0, %2}"
4635 [(set_attr "type" "sselog,ssemov,ssemov")
4636 (set_attr "prefix_data16" "*,1,1")
4637 (set_attr "mode" "V2DF,V1DF,V1DF")])
4638
4639 (define_expand "avx_shufpd256"
4640 [(match_operand:V4DF 0 "register_operand" "")
4641 (match_operand:V4DF 1 "register_operand" "")
4642 (match_operand:V4DF 2 "nonimmediate_operand" "")
4643 (match_operand:SI 3 "const_int_operand" "")]
4644 "TARGET_AVX"
4645 {
4646 int mask = INTVAL (operands[3]);
4647 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4648 GEN_INT (mask & 1),
4649 GEN_INT (mask & 2 ? 5 : 4),
4650 GEN_INT (mask & 4 ? 3 : 2),
4651 GEN_INT (mask & 8 ? 7 : 6)));
4652 DONE;
4653 })
4654
4655 (define_insn "avx_shufpd256_1"
4656 [(set (match_operand:V4DF 0 "register_operand" "=x")
4657 (vec_select:V4DF
4658 (vec_concat:V8DF
4659 (match_operand:V4DF 1 "register_operand" "x")
4660 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4661 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4662 (match_operand 4 "const_4_to_5_operand" "")
4663 (match_operand 5 "const_2_to_3_operand" "")
4664 (match_operand 6 "const_6_to_7_operand" "")])))]
4665 "TARGET_AVX"
4666 {
4667 int mask;
4668 mask = INTVAL (operands[3]);
4669 mask |= (INTVAL (operands[4]) - 4) << 1;
4670 mask |= (INTVAL (operands[5]) - 2) << 2;
4671 mask |= (INTVAL (operands[6]) - 6) << 3;
4672 operands[3] = GEN_INT (mask);
4673
4674 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4675 }
4676 [(set_attr "type" "sselog")
4677 (set_attr "length_immediate" "1")
4678 (set_attr "prefix" "vex")
4679 (set_attr "mode" "V4DF")])
4680
4681 (define_expand "sse2_shufpd"
4682 [(match_operand:V2DF 0 "register_operand" "")
4683 (match_operand:V2DF 1 "register_operand" "")
4684 (match_operand:V2DF 2 "nonimmediate_operand" "")
4685 (match_operand:SI 3 "const_int_operand" "")]
4686 "TARGET_SSE2"
4687 {
4688 int mask = INTVAL (operands[3]);
4689 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4690 GEN_INT (mask & 1),
4691 GEN_INT (mask & 2 ? 3 : 2)));
4692 DONE;
4693 })
4694
4695 (define_expand "vec_extract_even<mode>"
4696 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4697 (vec_select:SSEMODE4S
4698 (vec_concat:<ssedoublesizemode>
4699 (match_operand:SSEMODE4S 1 "register_operand" "")
4700 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4701 (parallel [(const_int 0)
4702 (const_int 2)
4703 (const_int 4)
4704 (const_int 6)])))]
4705 "TARGET_SSE")
4706
4707 (define_expand "vec_extract_odd<mode>"
4708 [(set (match_operand:SSEMODE4S 0 "register_operand" "")
4709 (vec_select:SSEMODE4S
4710 (vec_concat:<ssedoublesizemode>
4711 (match_operand:SSEMODE4S 1 "register_operand" "")
4712 (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
4713 (parallel [(const_int 1)
4714 (const_int 3)
4715 (const_int 5)
4716 (const_int 7)])))]
4717 "TARGET_SSE")
4718
4719 (define_expand "vec_extract_even<mode>"
4720 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4721 (vec_select:SSEMODE2D
4722 (vec_concat:<ssedoublesizemode>
4723 (match_operand:SSEMODE2D 1 "register_operand" "")
4724 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4725 (parallel [(const_int 0)
4726 (const_int 2)])))]
4727 "TARGET_SSE2")
4728
4729 (define_expand "vec_extract_odd<mode>"
4730 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4731 (vec_select:SSEMODE2D
4732 (vec_concat:<ssedoublesizemode>
4733 (match_operand:SSEMODE2D 1 "register_operand" "")
4734 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4735 (parallel [(const_int 1)
4736 (const_int 3)])))]
4737 "TARGET_SSE2")
4738
4739 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4740 (define_insn "*avx_punpckhqdq"
4741 [(set (match_operand:V2DI 0 "register_operand" "=x")
4742 (vec_select:V2DI
4743 (vec_concat:V4DI
4744 (match_operand:V2DI 1 "register_operand" "x")
4745 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4746 (parallel [(const_int 1)
4747 (const_int 3)])))]
4748 "TARGET_AVX"
4749 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4750 [(set_attr "type" "sselog")
4751 (set_attr "prefix" "vex")
4752 (set_attr "mode" "TI")])
4753
4754 (define_insn "sse2_punpckhqdq"
4755 [(set (match_operand:V2DI 0 "register_operand" "=x")
4756 (vec_select:V2DI
4757 (vec_concat:V4DI
4758 (match_operand:V2DI 1 "register_operand" "0")
4759 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4760 (parallel [(const_int 1)
4761 (const_int 3)])))]
4762 "TARGET_SSE2"
4763 "punpckhqdq\t{%2, %0|%0, %2}"
4764 [(set_attr "type" "sselog")
4765 (set_attr "prefix_data16" "1")
4766 (set_attr "mode" "TI")])
4767
4768 (define_insn "*avx_punpcklqdq"
4769 [(set (match_operand:V2DI 0 "register_operand" "=x")
4770 (vec_select:V2DI
4771 (vec_concat:V4DI
4772 (match_operand:V2DI 1 "register_operand" "x")
4773 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4774 (parallel [(const_int 0)
4775 (const_int 2)])))]
4776 "TARGET_AVX"
4777 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4778 [(set_attr "type" "sselog")
4779 (set_attr "prefix" "vex")
4780 (set_attr "mode" "TI")])
4781
4782 (define_insn "sse2_punpcklqdq"
4783 [(set (match_operand:V2DI 0 "register_operand" "=x")
4784 (vec_select:V2DI
4785 (vec_concat:V4DI
4786 (match_operand:V2DI 1 "register_operand" "0")
4787 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4788 (parallel [(const_int 0)
4789 (const_int 2)])))]
4790 "TARGET_SSE2"
4791 "punpcklqdq\t{%2, %0|%0, %2}"
4792 [(set_attr "type" "sselog")
4793 (set_attr "prefix_data16" "1")
4794 (set_attr "mode" "TI")])
4795
4796 (define_insn "*avx_shufpd_<mode>"
4797 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4798 (vec_select:SSEMODE2D
4799 (vec_concat:<ssedoublesizemode>
4800 (match_operand:SSEMODE2D 1 "register_operand" "x")
4801 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4802 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4803 (match_operand 4 "const_2_to_3_operand" "")])))]
4804 "TARGET_AVX"
4805 {
4806 int mask;
4807 mask = INTVAL (operands[3]);
4808 mask |= (INTVAL (operands[4]) - 2) << 1;
4809 operands[3] = GEN_INT (mask);
4810
4811 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4812 }
4813 [(set_attr "type" "sselog")
4814 (set_attr "length_immediate" "1")
4815 (set_attr "prefix" "vex")
4816 (set_attr "mode" "V2DF")])
4817
4818 (define_insn "sse2_shufpd_<mode>"
4819 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4820 (vec_select:SSEMODE2D
4821 (vec_concat:<ssedoublesizemode>
4822 (match_operand:SSEMODE2D 1 "register_operand" "0")
4823 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4824 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4825 (match_operand 4 "const_2_to_3_operand" "")])))]
4826 "TARGET_SSE2"
4827 {
4828 int mask;
4829 mask = INTVAL (operands[3]);
4830 mask |= (INTVAL (operands[4]) - 2) << 1;
4831 operands[3] = GEN_INT (mask);
4832
4833 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4834 }
4835 [(set_attr "type" "sselog")
4836 (set_attr "length_immediate" "1")
4837 (set_attr "mode" "V2DF")])
4838
4839 ;; Avoid combining registers from different units in a single alternative,
4840 ;; see comment above inline_secondary_memory_needed function in i386.c
4841 (define_insn "*avx_storehpd"
4842 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4843 (vec_select:DF
4844 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4845 (parallel [(const_int 1)])))]
4846 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4847 "@
4848 vmovhpd\t{%1, %0|%0, %1}
4849 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4850 #
4851 #
4852 #"
4853 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4854 (set_attr "prefix" "vex")
4855 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4856
4857 (define_insn "sse2_storehpd"
4858 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4859 (vec_select:DF
4860 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4861 (parallel [(const_int 1)])))]
4862 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4863 "@
4864 movhpd\t{%1, %0|%0, %1}
4865 unpckhpd\t%0, %0
4866 #
4867 #
4868 #"
4869 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4870 (set_attr "prefix_data16" "1,*,*,*,*")
4871 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4872
4873 (define_split
4874 [(set (match_operand:DF 0 "register_operand" "")
4875 (vec_select:DF
4876 (match_operand:V2DF 1 "memory_operand" "")
4877 (parallel [(const_int 1)])))]
4878 "TARGET_SSE2 && reload_completed"
4879 [(set (match_dup 0) (match_dup 1))]
4880 {
4881 operands[1] = adjust_address (operands[1], DFmode, 8);
4882 })
4883
4884 ;; Avoid combining registers from different units in a single alternative,
4885 ;; see comment above inline_secondary_memory_needed function in i386.c
4886 (define_insn "sse2_storelpd"
4887 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4888 (vec_select:DF
4889 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4890 (parallel [(const_int 0)])))]
4891 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4892 "@
4893 %vmovlpd\t{%1, %0|%0, %1}
4894 #
4895 #
4896 #
4897 #"
4898 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4899 (set_attr "prefix_data16" "1,*,*,*,*")
4900 (set_attr "prefix" "maybe_vex")
4901 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4902
4903 (define_split
4904 [(set (match_operand:DF 0 "register_operand" "")
4905 (vec_select:DF
4906 (match_operand:V2DF 1 "nonimmediate_operand" "")
4907 (parallel [(const_int 0)])))]
4908 "TARGET_SSE2 && reload_completed"
4909 [(const_int 0)]
4910 {
4911 rtx op1 = operands[1];
4912 if (REG_P (op1))
4913 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4914 else
4915 op1 = gen_lowpart (DFmode, op1);
4916 emit_move_insn (operands[0], op1);
4917 DONE;
4918 })
4919
4920 (define_expand "sse2_loadhpd_exp"
4921 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4922 (vec_concat:V2DF
4923 (vec_select:DF
4924 (match_operand:V2DF 1 "nonimmediate_operand" "")
4925 (parallel [(const_int 0)]))
4926 (match_operand:DF 2 "nonimmediate_operand" "")))]
4927 "TARGET_SSE2"
4928 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4929
4930 ;; Avoid combining registers from different units in a single alternative,
4931 ;; see comment above inline_secondary_memory_needed function in i386.c
4932 (define_insn "*avx_loadhpd"
4933 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4934 (vec_concat:V2DF
4935 (vec_select:DF
4936 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4937 (parallel [(const_int 0)]))
4938 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4939 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4940 "@
4941 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4942 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4943 #
4944 #
4945 #"
4946 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4947 (set_attr "prefix" "vex")
4948 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4949
4950 (define_insn "sse2_loadhpd"
4951 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4952 (vec_concat:V2DF
4953 (vec_select:DF
4954 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4955 (parallel [(const_int 0)]))
4956 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4957 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4958 "@
4959 movhpd\t{%2, %0|%0, %2}
4960 unpcklpd\t{%2, %0|%0, %2}
4961 shufpd\t{$1, %1, %0|%0, %1, 1}
4962 #
4963 #
4964 #"
4965 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4966 (set_attr "prefix_data16" "1,*,*,*,*,*")
4967 (set_attr "length_immediate" "*,*,1,*,*,*")
4968 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4969
4970 (define_split
4971 [(set (match_operand:V2DF 0 "memory_operand" "")
4972 (vec_concat:V2DF
4973 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4974 (match_operand:DF 1 "register_operand" "")))]
4975 "TARGET_SSE2 && reload_completed"
4976 [(set (match_dup 0) (match_dup 1))]
4977 {
4978 operands[0] = adjust_address (operands[0], DFmode, 8);
4979 })
4980
4981 (define_expand "sse2_loadlpd_exp"
4982 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4983 (vec_concat:V2DF
4984 (match_operand:DF 2 "nonimmediate_operand" "")
4985 (vec_select:DF
4986 (match_operand:V2DF 1 "nonimmediate_operand" "")
4987 (parallel [(const_int 1)]))))]
4988 "TARGET_SSE2"
4989 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4990
4991 ;; Avoid combining registers from different units in a single alternative,
4992 ;; see comment above inline_secondary_memory_needed function in i386.c
4993 (define_insn "*avx_loadlpd"
4994 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4995 (vec_concat:V2DF
4996 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4997 (vec_select:DF
4998 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4999 (parallel [(const_int 1)]))))]
5000 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5001 "@
5002 vmovsd\t{%2, %0|%0, %2}
5003 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5004 vmovsd\t{%2, %1, %0|%0, %1, %2}
5005 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
5006 #
5007 #
5008 #"
5009 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
5010 (set_attr "prefix" "vex")
5011 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
5012
5013 (define_insn "sse2_loadlpd"
5014 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
5015 (vec_concat:V2DF
5016 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
5017 (vec_select:DF
5018 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
5019 (parallel [(const_int 1)]))))]
5020 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5021 "@
5022 movsd\t{%2, %0|%0, %2}
5023 movlpd\t{%2, %0|%0, %2}
5024 movsd\t{%2, %0|%0, %2}
5025 shufpd\t{$2, %2, %0|%0, %2, 2}
5026 movhpd\t{%H1, %0|%0, %H1}
5027 #
5028 #
5029 #"
5030 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
5031 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
5032 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
5033 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
5034
5035 (define_split
5036 [(set (match_operand:V2DF 0 "memory_operand" "")
5037 (vec_concat:V2DF
5038 (match_operand:DF 1 "register_operand" "")
5039 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
5040 "TARGET_SSE2 && reload_completed"
5041 [(set (match_dup 0) (match_dup 1))]
5042 {
5043 operands[0] = adjust_address (operands[0], DFmode, 8);
5044 })
5045
5046 ;; Not sure these two are ever used, but it doesn't hurt to have
5047 ;; them. -aoliva
5048 (define_insn "*vec_extractv2df_1_sse"
5049 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5050 (vec_select:DF
5051 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
5052 (parallel [(const_int 1)])))]
5053 "!TARGET_SSE2 && TARGET_SSE
5054 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5055 "@
5056 movhps\t{%1, %0|%0, %1}
5057 movhlps\t{%1, %0|%0, %1}
5058 movlps\t{%H1, %0|%0, %H1}"
5059 [(set_attr "type" "ssemov")
5060 (set_attr "mode" "V2SF,V4SF,V2SF")])
5061
5062 (define_insn "*vec_extractv2df_0_sse"
5063 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5064 (vec_select:DF
5065 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
5066 (parallel [(const_int 0)])))]
5067 "!TARGET_SSE2 && TARGET_SSE
5068 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5069 "@
5070 movlps\t{%1, %0|%0, %1}
5071 movaps\t{%1, %0|%0, %1}
5072 movlps\t{%1, %0|%0, %1}"
5073 [(set_attr "type" "ssemov")
5074 (set_attr "mode" "V2SF,V4SF,V2SF")])
5075
5076 (define_insn "*avx_movsd"
5077 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
5078 (vec_merge:V2DF
5079 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5080 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5081 (const_int 1)))]
5082 "TARGET_AVX"
5083 "@
5084 vmovsd\t{%2, %1, %0|%0, %1, %2}
5085 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5086 vmovlpd\t{%2, %0|%0, %2}
5087 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5088 vmovhps\t{%1, %H0|%H0, %1}"
5089 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5090 (set_attr "prefix" "vex")
5091 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5092
5093 (define_insn "sse2_movsd"
5094 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5095 (vec_merge:V2DF
5096 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5097 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5098 (const_int 1)))]
5099 "TARGET_SSE2"
5100 "@
5101 movsd\t{%2, %0|%0, %2}
5102 movlpd\t{%2, %0|%0, %2}
5103 movlpd\t{%2, %0|%0, %2}
5104 shufpd\t{$2, %2, %0|%0, %2, 2}
5105 movhps\t{%H1, %0|%0, %H1}
5106 movhps\t{%1, %H0|%H0, %1}"
5107 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5108 (set_attr "prefix_data16" "*,1,1,*,*,*")
5109 (set_attr "length_immediate" "*,*,*,1,*,*")
5110 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5111
5112 (define_insn "*vec_dupv2df_sse3"
5113 [(set (match_operand:V2DF 0 "register_operand" "=x")
5114 (vec_duplicate:V2DF
5115 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5116 "TARGET_SSE3"
5117 "%vmovddup\t{%1, %0|%0, %1}"
5118 [(set_attr "type" "sselog1")
5119 (set_attr "prefix" "maybe_vex")
5120 (set_attr "mode" "DF")])
5121
5122 (define_insn "vec_dupv2df"
5123 [(set (match_operand:V2DF 0 "register_operand" "=x")
5124 (vec_duplicate:V2DF
5125 (match_operand:DF 1 "register_operand" "0")))]
5126 "TARGET_SSE2"
5127 "unpcklpd\t%0, %0"
5128 [(set_attr "type" "sselog1")
5129 (set_attr "mode" "V2DF")])
5130
5131 (define_insn "*vec_concatv2df_sse3"
5132 [(set (match_operand:V2DF 0 "register_operand" "=x")
5133 (vec_concat:V2DF
5134 (match_operand:DF 1 "nonimmediate_operand" "xm")
5135 (match_dup 1)))]
5136 "TARGET_SSE3"
5137 "%vmovddup\t{%1, %0|%0, %1}"
5138 [(set_attr "type" "sselog1")
5139 (set_attr "prefix" "maybe_vex")
5140 (set_attr "mode" "DF")])
5141
5142 (define_insn "*vec_concatv2df_avx"
5143 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5144 (vec_concat:V2DF
5145 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5146 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5147 "TARGET_AVX"
5148 "@
5149 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5150 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5151 vmovsd\t{%1, %0|%0, %1}"
5152 [(set_attr "type" "ssemov")
5153 (set_attr "prefix" "vex")
5154 (set_attr "mode" "DF,V1DF,DF")])
5155
5156 (define_insn "*vec_concatv2df"
5157 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5158 (vec_concat:V2DF
5159 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5160 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5161 "TARGET_SSE"
5162 "@
5163 unpcklpd\t{%2, %0|%0, %2}
5164 movhpd\t{%2, %0|%0, %2}
5165 movsd\t{%1, %0|%0, %1}
5166 movlhps\t{%2, %0|%0, %2}
5167 movhps\t{%2, %0|%0, %2}"
5168 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5169 (set_attr "prefix_data16" "*,1,*,*,*")
5170 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5171
5172 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5173 ;;
5174 ;; Parallel integral arithmetic
5175 ;;
5176 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5177
5178 (define_expand "neg<mode>2"
5179 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5180 (minus:SSEMODEI
5181 (match_dup 2)
5182 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5183 "TARGET_SSE2"
5184 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5185
5186 (define_expand "<plusminus_insn><mode>3"
5187 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5188 (plusminus:SSEMODEI
5189 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5190 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5191 "TARGET_SSE2"
5192 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5193
5194 (define_insn "*avx_<plusminus_insn><mode>3"
5195 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5196 (plusminus:SSEMODEI
5197 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5198 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5199 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5200 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5201 [(set_attr "type" "sseiadd")
5202 (set_attr "prefix" "vex")
5203 (set_attr "mode" "TI")])
5204
5205 (define_insn "*<plusminus_insn><mode>3"
5206 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5207 (plusminus:SSEMODEI
5208 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5209 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5210 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5211 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5212 [(set_attr "type" "sseiadd")
5213 (set_attr "prefix_data16" "1")
5214 (set_attr "mode" "TI")])
5215
5216 (define_expand "sse2_<plusminus_insn><mode>3"
5217 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5218 (sat_plusminus:SSEMODE12
5219 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5220 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5221 "TARGET_SSE2"
5222 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5223
5224 (define_insn "*avx_<plusminus_insn><mode>3"
5225 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5226 (sat_plusminus:SSEMODE12
5227 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5228 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5229 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5230 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5231 [(set_attr "type" "sseiadd")
5232 (set_attr "prefix" "vex")
5233 (set_attr "mode" "TI")])
5234
5235 (define_insn "*sse2_<plusminus_insn><mode>3"
5236 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5237 (sat_plusminus:SSEMODE12
5238 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5239 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5240 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5241 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5242 [(set_attr "type" "sseiadd")
5243 (set_attr "prefix_data16" "1")
5244 (set_attr "mode" "TI")])
5245
5246 (define_insn_and_split "mulv16qi3"
5247 [(set (match_operand:V16QI 0 "register_operand" "")
5248 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5249 (match_operand:V16QI 2 "register_operand" "")))]
5250 "TARGET_SSE2
5251 && can_create_pseudo_p ()"
5252 "#"
5253 "&& 1"
5254 [(const_int 0)]
5255 {
5256 rtx t[12];
5257 int i;
5258
5259 for (i = 0; i < 12; ++i)
5260 t[i] = gen_reg_rtx (V16QImode);
5261
5262 /* Unpack data such that we've got a source byte in each low byte of
5263 each word. We don't care what goes into the high byte of each word.
5264 Rather than trying to get zero in there, most convenient is to let
5265 it be a copy of the low byte. */
5266 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
5267 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
5268 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
5269 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
5270
5271 /* Multiply words. The end-of-line annotations here give a picture of what
5272 the output of that instruction looks like. Dot means don't care; the
5273 letters are the bytes of the result with A being the most significant. */
5274 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5275 gen_lowpart (V8HImode, t[0]),
5276 gen_lowpart (V8HImode, t[1])));
5277 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5278 gen_lowpart (V8HImode, t[2]),
5279 gen_lowpart (V8HImode, t[3])));
5280
5281 /* Extract the relevant bytes and merge them back together. */
5282 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
5283 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
5284 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
5285 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
5286 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
5287 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
5288
5289 emit_insn (gen_sse2_punpcklbw (operands[0], t[11], t[10])); /* ABCDEFGHIJKLMNOP */
5290 DONE;
5291 })
5292
5293 (define_expand "mulv8hi3"
5294 [(set (match_operand:V8HI 0 "register_operand" "")
5295 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5296 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5297 "TARGET_SSE2"
5298 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5299
5300 (define_insn "*avx_mulv8hi3"
5301 [(set (match_operand:V8HI 0 "register_operand" "=x")
5302 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5303 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5304 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5305 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5306 [(set_attr "type" "sseimul")
5307 (set_attr "prefix" "vex")
5308 (set_attr "mode" "TI")])
5309
5310 (define_insn "*mulv8hi3"
5311 [(set (match_operand:V8HI 0 "register_operand" "=x")
5312 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5313 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5314 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5315 "pmullw\t{%2, %0|%0, %2}"
5316 [(set_attr "type" "sseimul")
5317 (set_attr "prefix_data16" "1")
5318 (set_attr "mode" "TI")])
5319
5320 (define_expand "smulv8hi3_highpart"
5321 [(set (match_operand:V8HI 0 "register_operand" "")
5322 (truncate:V8HI
5323 (lshiftrt:V8SI
5324 (mult:V8SI
5325 (sign_extend:V8SI
5326 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5327 (sign_extend:V8SI
5328 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5329 (const_int 16))))]
5330 "TARGET_SSE2"
5331 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5332
5333 (define_insn "*avxv8hi3_highpart"
5334 [(set (match_operand:V8HI 0 "register_operand" "=x")
5335 (truncate:V8HI
5336 (lshiftrt:V8SI
5337 (mult:V8SI
5338 (sign_extend:V8SI
5339 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5340 (sign_extend:V8SI
5341 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5342 (const_int 16))))]
5343 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5344 "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
5345 [(set_attr "type" "sseimul")
5346 (set_attr "prefix" "vex")
5347 (set_attr "mode" "TI")])
5348
5349 (define_insn "*smulv8hi3_highpart"
5350 [(set (match_operand:V8HI 0 "register_operand" "=x")
5351 (truncate:V8HI
5352 (lshiftrt:V8SI
5353 (mult:V8SI
5354 (sign_extend:V8SI
5355 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5356 (sign_extend:V8SI
5357 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5358 (const_int 16))))]
5359 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5360 "pmulhw\t{%2, %0|%0, %2}"
5361 [(set_attr "type" "sseimul")
5362 (set_attr "prefix_data16" "1")
5363 (set_attr "mode" "TI")])
5364
5365 (define_expand "umulv8hi3_highpart"
5366 [(set (match_operand:V8HI 0 "register_operand" "")
5367 (truncate:V8HI
5368 (lshiftrt:V8SI
5369 (mult:V8SI
5370 (zero_extend:V8SI
5371 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5372 (zero_extend:V8SI
5373 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5374 (const_int 16))))]
5375 "TARGET_SSE2"
5376 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5377
5378 (define_insn "*avx_umulv8hi3_highpart"
5379 [(set (match_operand:V8HI 0 "register_operand" "=x")
5380 (truncate:V8HI
5381 (lshiftrt:V8SI
5382 (mult:V8SI
5383 (zero_extend:V8SI
5384 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5385 (zero_extend:V8SI
5386 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5387 (const_int 16))))]
5388 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5389 "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
5390 [(set_attr "type" "sseimul")
5391 (set_attr "prefix" "vex")
5392 (set_attr "mode" "TI")])
5393
5394 (define_insn "*umulv8hi3_highpart"
5395 [(set (match_operand:V8HI 0 "register_operand" "=x")
5396 (truncate:V8HI
5397 (lshiftrt:V8SI
5398 (mult:V8SI
5399 (zero_extend:V8SI
5400 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5401 (zero_extend:V8SI
5402 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5403 (const_int 16))))]
5404 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5405 "pmulhuw\t{%2, %0|%0, %2}"
5406 [(set_attr "type" "sseimul")
5407 (set_attr "prefix_data16" "1")
5408 (set_attr "mode" "TI")])
5409
5410 (define_expand "sse2_umulv2siv2di3"
5411 [(set (match_operand:V2DI 0 "register_operand" "")
5412 (mult:V2DI
5413 (zero_extend:V2DI
5414 (vec_select:V2SI
5415 (match_operand:V4SI 1 "nonimmediate_operand" "")
5416 (parallel [(const_int 0) (const_int 2)])))
5417 (zero_extend:V2DI
5418 (vec_select:V2SI
5419 (match_operand:V4SI 2 "nonimmediate_operand" "")
5420 (parallel [(const_int 0) (const_int 2)])))))]
5421 "TARGET_SSE2"
5422 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5423
5424 (define_insn "*avx_umulv2siv2di3"
5425 [(set (match_operand:V2DI 0 "register_operand" "=x")
5426 (mult:V2DI
5427 (zero_extend:V2DI
5428 (vec_select:V2SI
5429 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5430 (parallel [(const_int 0) (const_int 2)])))
5431 (zero_extend:V2DI
5432 (vec_select:V2SI
5433 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5434 (parallel [(const_int 0) (const_int 2)])))))]
5435 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5436 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5437 [(set_attr "type" "sseimul")
5438 (set_attr "prefix" "vex")
5439 (set_attr "mode" "TI")])
5440
5441 (define_insn "*sse2_umulv2siv2di3"
5442 [(set (match_operand:V2DI 0 "register_operand" "=x")
5443 (mult:V2DI
5444 (zero_extend:V2DI
5445 (vec_select:V2SI
5446 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5447 (parallel [(const_int 0) (const_int 2)])))
5448 (zero_extend:V2DI
5449 (vec_select:V2SI
5450 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5451 (parallel [(const_int 0) (const_int 2)])))))]
5452 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5453 "pmuludq\t{%2, %0|%0, %2}"
5454 [(set_attr "type" "sseimul")
5455 (set_attr "prefix_data16" "1")
5456 (set_attr "mode" "TI")])
5457
5458 (define_expand "sse4_1_mulv2siv2di3"
5459 [(set (match_operand:V2DI 0 "register_operand" "")
5460 (mult:V2DI
5461 (sign_extend:V2DI
5462 (vec_select:V2SI
5463 (match_operand:V4SI 1 "nonimmediate_operand" "")
5464 (parallel [(const_int 0) (const_int 2)])))
5465 (sign_extend:V2DI
5466 (vec_select:V2SI
5467 (match_operand:V4SI 2 "nonimmediate_operand" "")
5468 (parallel [(const_int 0) (const_int 2)])))))]
5469 "TARGET_SSE4_1"
5470 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5471
5472 (define_insn "*avx_mulv2siv2di3"
5473 [(set (match_operand:V2DI 0 "register_operand" "=x")
5474 (mult:V2DI
5475 (sign_extend:V2DI
5476 (vec_select:V2SI
5477 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5478 (parallel [(const_int 0) (const_int 2)])))
5479 (sign_extend:V2DI
5480 (vec_select:V2SI
5481 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5482 (parallel [(const_int 0) (const_int 2)])))))]
5483 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5484 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5485 [(set_attr "type" "sseimul")
5486 (set_attr "prefix_extra" "1")
5487 (set_attr "prefix" "vex")
5488 (set_attr "mode" "TI")])
5489
5490 (define_insn "*sse4_1_mulv2siv2di3"
5491 [(set (match_operand:V2DI 0 "register_operand" "=x")
5492 (mult:V2DI
5493 (sign_extend:V2DI
5494 (vec_select:V2SI
5495 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5496 (parallel [(const_int 0) (const_int 2)])))
5497 (sign_extend:V2DI
5498 (vec_select:V2SI
5499 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5500 (parallel [(const_int 0) (const_int 2)])))))]
5501 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5502 "pmuldq\t{%2, %0|%0, %2}"
5503 [(set_attr "type" "sseimul")
5504 (set_attr "prefix_extra" "1")
5505 (set_attr "mode" "TI")])
5506
5507 (define_expand "sse2_pmaddwd"
5508 [(set (match_operand:V4SI 0 "register_operand" "")
5509 (plus:V4SI
5510 (mult:V4SI
5511 (sign_extend:V4SI
5512 (vec_select:V4HI
5513 (match_operand:V8HI 1 "nonimmediate_operand" "")
5514 (parallel [(const_int 0)
5515 (const_int 2)
5516 (const_int 4)
5517 (const_int 6)])))
5518 (sign_extend:V4SI
5519 (vec_select:V4HI
5520 (match_operand:V8HI 2 "nonimmediate_operand" "")
5521 (parallel [(const_int 0)
5522 (const_int 2)
5523 (const_int 4)
5524 (const_int 6)]))))
5525 (mult:V4SI
5526 (sign_extend:V4SI
5527 (vec_select:V4HI (match_dup 1)
5528 (parallel [(const_int 1)
5529 (const_int 3)
5530 (const_int 5)
5531 (const_int 7)])))
5532 (sign_extend:V4SI
5533 (vec_select:V4HI (match_dup 2)
5534 (parallel [(const_int 1)
5535 (const_int 3)
5536 (const_int 5)
5537 (const_int 7)]))))))]
5538 "TARGET_SSE2"
5539 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5540
5541 (define_insn "*avx_pmaddwd"
5542 [(set (match_operand:V4SI 0 "register_operand" "=x")
5543 (plus:V4SI
5544 (mult:V4SI
5545 (sign_extend:V4SI
5546 (vec_select:V4HI
5547 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5548 (parallel [(const_int 0)
5549 (const_int 2)
5550 (const_int 4)
5551 (const_int 6)])))
5552 (sign_extend:V4SI
5553 (vec_select:V4HI
5554 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5555 (parallel [(const_int 0)
5556 (const_int 2)
5557 (const_int 4)
5558 (const_int 6)]))))
5559 (mult:V4SI
5560 (sign_extend:V4SI
5561 (vec_select:V4HI (match_dup 1)
5562 (parallel [(const_int 1)
5563 (const_int 3)
5564 (const_int 5)
5565 (const_int 7)])))
5566 (sign_extend:V4SI
5567 (vec_select:V4HI (match_dup 2)
5568 (parallel [(const_int 1)
5569 (const_int 3)
5570 (const_int 5)
5571 (const_int 7)]))))))]
5572 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5573 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5574 [(set_attr "type" "sseiadd")
5575 (set_attr "prefix" "vex")
5576 (set_attr "mode" "TI")])
5577
5578 (define_insn "*sse2_pmaddwd"
5579 [(set (match_operand:V4SI 0 "register_operand" "=x")
5580 (plus:V4SI
5581 (mult:V4SI
5582 (sign_extend:V4SI
5583 (vec_select:V4HI
5584 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5585 (parallel [(const_int 0)
5586 (const_int 2)
5587 (const_int 4)
5588 (const_int 6)])))
5589 (sign_extend:V4SI
5590 (vec_select:V4HI
5591 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5592 (parallel [(const_int 0)
5593 (const_int 2)
5594 (const_int 4)
5595 (const_int 6)]))))
5596 (mult:V4SI
5597 (sign_extend:V4SI
5598 (vec_select:V4HI (match_dup 1)
5599 (parallel [(const_int 1)
5600 (const_int 3)
5601 (const_int 5)
5602 (const_int 7)])))
5603 (sign_extend:V4SI
5604 (vec_select:V4HI (match_dup 2)
5605 (parallel [(const_int 1)
5606 (const_int 3)
5607 (const_int 5)
5608 (const_int 7)]))))))]
5609 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5610 "pmaddwd\t{%2, %0|%0, %2}"
5611 [(set_attr "type" "sseiadd")
5612 (set_attr "atom_unit" "simul")
5613 (set_attr "prefix_data16" "1")
5614 (set_attr "mode" "TI")])
5615
5616 (define_expand "mulv4si3"
5617 [(set (match_operand:V4SI 0 "register_operand" "")
5618 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5619 (match_operand:V4SI 2 "register_operand" "")))]
5620 "TARGET_SSE2"
5621 {
5622 if (TARGET_SSE4_1 || TARGET_XOP)
5623 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5624 })
5625
5626 (define_insn "*avx_mulv4si3"
5627 [(set (match_operand:V4SI 0 "register_operand" "=x")
5628 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5629 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5630 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5631 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5632 [(set_attr "type" "sseimul")
5633 (set_attr "prefix_extra" "1")
5634 (set_attr "prefix" "vex")
5635 (set_attr "mode" "TI")])
5636
5637 (define_insn "*sse4_1_mulv4si3"
5638 [(set (match_operand:V4SI 0 "register_operand" "=x")
5639 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5640 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5641 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5642 "pmulld\t{%2, %0|%0, %2}"
5643 [(set_attr "type" "sseimul")
5644 (set_attr "prefix_extra" "1")
5645 (set_attr "mode" "TI")])
5646
5647 (define_insn_and_split "*sse2_mulv4si3"
5648 [(set (match_operand:V4SI 0 "register_operand" "")
5649 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5650 (match_operand:V4SI 2 "register_operand" "")))]
5651 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_XOP
5652 && can_create_pseudo_p ()"
5653 "#"
5654 "&& 1"
5655 [(const_int 0)]
5656 {
5657 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5658 rtx op0, op1, op2;
5659
5660 op0 = operands[0];
5661 op1 = operands[1];
5662 op2 = operands[2];
5663 t1 = gen_reg_rtx (V4SImode);
5664 t2 = gen_reg_rtx (V4SImode);
5665 t3 = gen_reg_rtx (V4SImode);
5666 t4 = gen_reg_rtx (V4SImode);
5667 t5 = gen_reg_rtx (V4SImode);
5668 t6 = gen_reg_rtx (V4SImode);
5669 thirtytwo = GEN_INT (32);
5670
5671 /* Multiply elements 2 and 0. */
5672 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5673 op1, op2));
5674
5675 /* Shift both input vectors down one element, so that elements 3
5676 and 1 are now in the slots for elements 2 and 0. For K8, at
5677 least, this is faster than using a shuffle. */
5678 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5679 gen_lowpart (TImode, op1),
5680 thirtytwo));
5681 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5682 gen_lowpart (TImode, op2),
5683 thirtytwo));
5684 /* Multiply elements 3 and 1. */
5685 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5686 t2, t3));
5687
5688 /* Move the results in element 2 down to element 1; we don't care
5689 what goes in elements 2 and 3. */
5690 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5691 const0_rtx, const0_rtx));
5692 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5693 const0_rtx, const0_rtx));
5694
5695 /* Merge the parts back together. */
5696 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
5697 DONE;
5698 })
5699
5700 (define_insn_and_split "mulv2di3"
5701 [(set (match_operand:V2DI 0 "register_operand" "")
5702 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5703 (match_operand:V2DI 2 "register_operand" "")))]
5704 "TARGET_SSE2
5705 && can_create_pseudo_p ()"
5706 "#"
5707 "&& 1"
5708 [(const_int 0)]
5709 {
5710 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5711 rtx op0, op1, op2;
5712
5713 if (TARGET_XOP)
5714 {
5715 /* op1: A,B,C,D, op2: E,F,G,H */
5716 op0 = operands[0];
5717 op1 = gen_lowpart (V4SImode, operands[1]);
5718 op2 = gen_lowpart (V4SImode, operands[2]);
5719 t1 = gen_reg_rtx (V4SImode);
5720 t2 = gen_reg_rtx (V4SImode);
5721 t3 = gen_reg_rtx (V4SImode);
5722 t4 = gen_reg_rtx (V2DImode);
5723 t5 = gen_reg_rtx (V2DImode);
5724
5725 /* t1: B,A,D,C */
5726 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5727 GEN_INT (1),
5728 GEN_INT (0),
5729 GEN_INT (3),
5730 GEN_INT (2)));
5731
5732 /* t2: 0 */
5733 emit_move_insn (t2, CONST0_RTX (V4SImode));
5734
5735 /* t3: (B*E),(A*F),(D*G),(C*H) */
5736 emit_insn (gen_xop_pmacsdd (t3, t1, op2, t2));
5737
5738 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5739 emit_insn (gen_xop_phadddq (t4, t3));
5740
5741 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5742 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
5743
5744 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5745 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t5));
5746 DONE;
5747 }
5748
5749 op0 = operands[0];
5750 op1 = operands[1];
5751 op2 = operands[2];
5752 t1 = gen_reg_rtx (V2DImode);
5753 t2 = gen_reg_rtx (V2DImode);
5754 t3 = gen_reg_rtx (V2DImode);
5755 t4 = gen_reg_rtx (V2DImode);
5756 t5 = gen_reg_rtx (V2DImode);
5757 t6 = gen_reg_rtx (V2DImode);
5758 thirtytwo = GEN_INT (32);
5759
5760 /* Multiply low parts. */
5761 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5762 gen_lowpart (V4SImode, op2)));
5763
5764 /* Shift input vectors left 32 bits so we can multiply high parts. */
5765 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5766 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5767
5768 /* Multiply high parts by low parts. */
5769 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5770 gen_lowpart (V4SImode, t3)));
5771 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5772 gen_lowpart (V4SImode, t2)));
5773
5774 /* Shift them back. */
5775 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5776 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5777
5778 /* Add the three parts together. */
5779 emit_insn (gen_addv2di3 (t6, t1, t4));
5780 emit_insn (gen_addv2di3 (op0, t6, t5));
5781 DONE;
5782 })
5783
5784 (define_expand "vec_widen_smult_hi_v8hi"
5785 [(match_operand:V4SI 0 "register_operand" "")
5786 (match_operand:V8HI 1 "register_operand" "")
5787 (match_operand:V8HI 2 "register_operand" "")]
5788 "TARGET_SSE2"
5789 {
5790 rtx op1, op2, t1, t2, dest;
5791
5792 op1 = operands[1];
5793 op2 = operands[2];
5794 t1 = gen_reg_rtx (V8HImode);
5795 t2 = gen_reg_rtx (V8HImode);
5796 dest = gen_lowpart (V8HImode, operands[0]);
5797
5798 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5799 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5800 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5801 DONE;
5802 })
5803
5804 (define_expand "vec_widen_smult_lo_v8hi"
5805 [(match_operand:V4SI 0 "register_operand" "")
5806 (match_operand:V8HI 1 "register_operand" "")
5807 (match_operand:V8HI 2 "register_operand" "")]
5808 "TARGET_SSE2"
5809 {
5810 rtx op1, op2, t1, t2, dest;
5811
5812 op1 = operands[1];
5813 op2 = operands[2];
5814 t1 = gen_reg_rtx (V8HImode);
5815 t2 = gen_reg_rtx (V8HImode);
5816 dest = gen_lowpart (V8HImode, operands[0]);
5817
5818 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5819 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5820 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5821 DONE;
5822 })
5823
5824 (define_expand "vec_widen_umult_hi_v8hi"
5825 [(match_operand:V4SI 0 "register_operand" "")
5826 (match_operand:V8HI 1 "register_operand" "")
5827 (match_operand:V8HI 2 "register_operand" "")]
5828 "TARGET_SSE2"
5829 {
5830 rtx op1, op2, t1, t2, dest;
5831
5832 op1 = operands[1];
5833 op2 = operands[2];
5834 t1 = gen_reg_rtx (V8HImode);
5835 t2 = gen_reg_rtx (V8HImode);
5836 dest = gen_lowpart (V8HImode, operands[0]);
5837
5838 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5839 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5840 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5841 DONE;
5842 })
5843
5844 (define_expand "vec_widen_umult_lo_v8hi"
5845 [(match_operand:V4SI 0 "register_operand" "")
5846 (match_operand:V8HI 1 "register_operand" "")
5847 (match_operand:V8HI 2 "register_operand" "")]
5848 "TARGET_SSE2"
5849 {
5850 rtx op1, op2, t1, t2, dest;
5851
5852 op1 = operands[1];
5853 op2 = operands[2];
5854 t1 = gen_reg_rtx (V8HImode);
5855 t2 = gen_reg_rtx (V8HImode);
5856 dest = gen_lowpart (V8HImode, operands[0]);
5857
5858 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5859 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5860 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5861 DONE;
5862 })
5863
5864 (define_expand "vec_widen_smult_hi_v4si"
5865 [(match_operand:V2DI 0 "register_operand" "")
5866 (match_operand:V4SI 1 "register_operand" "")
5867 (match_operand:V4SI 2 "register_operand" "")]
5868 "TARGET_XOP"
5869 {
5870 rtx t1, t2;
5871
5872 t1 = gen_reg_rtx (V4SImode);
5873 t2 = gen_reg_rtx (V4SImode);
5874
5875 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5876 GEN_INT (0),
5877 GEN_INT (2),
5878 GEN_INT (1),
5879 GEN_INT (3)));
5880 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5881 GEN_INT (0),
5882 GEN_INT (2),
5883 GEN_INT (1),
5884 GEN_INT (3)));
5885 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5886 DONE;
5887 })
5888
5889 (define_expand "vec_widen_smult_lo_v4si"
5890 [(match_operand:V2DI 0 "register_operand" "")
5891 (match_operand:V4SI 1 "register_operand" "")
5892 (match_operand:V4SI 2 "register_operand" "")]
5893 "TARGET_XOP"
5894 {
5895 rtx t1, t2;
5896
5897 t1 = gen_reg_rtx (V4SImode);
5898 t2 = gen_reg_rtx (V4SImode);
5899
5900 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5901 GEN_INT (0),
5902 GEN_INT (2),
5903 GEN_INT (1),
5904 GEN_INT (3)));
5905 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5906 GEN_INT (0),
5907 GEN_INT (2),
5908 GEN_INT (1),
5909 GEN_INT (3)));
5910 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5911 DONE;
5912 })
5913
5914 (define_expand "vec_widen_umult_hi_v4si"
5915 [(match_operand:V2DI 0 "register_operand" "")
5916 (match_operand:V4SI 1 "register_operand" "")
5917 (match_operand:V4SI 2 "register_operand" "")]
5918 "TARGET_SSE2"
5919 {
5920 rtx op1, op2, t1, t2;
5921
5922 op1 = operands[1];
5923 op2 = operands[2];
5924 t1 = gen_reg_rtx (V4SImode);
5925 t2 = gen_reg_rtx (V4SImode);
5926
5927 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5928 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5929 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5930 DONE;
5931 })
5932
5933 (define_expand "vec_widen_umult_lo_v4si"
5934 [(match_operand:V2DI 0 "register_operand" "")
5935 (match_operand:V4SI 1 "register_operand" "")
5936 (match_operand:V4SI 2 "register_operand" "")]
5937 "TARGET_SSE2"
5938 {
5939 rtx op1, op2, t1, t2;
5940
5941 op1 = operands[1];
5942 op2 = operands[2];
5943 t1 = gen_reg_rtx (V4SImode);
5944 t2 = gen_reg_rtx (V4SImode);
5945
5946 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5947 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5948 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5949 DONE;
5950 })
5951
5952 (define_expand "sdot_prodv8hi"
5953 [(match_operand:V4SI 0 "register_operand" "")
5954 (match_operand:V8HI 1 "register_operand" "")
5955 (match_operand:V8HI 2 "register_operand" "")
5956 (match_operand:V4SI 3 "register_operand" "")]
5957 "TARGET_SSE2"
5958 {
5959 rtx t = gen_reg_rtx (V4SImode);
5960 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5961 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5962 DONE;
5963 })
5964
5965 (define_expand "udot_prodv4si"
5966 [(match_operand:V2DI 0 "register_operand" "")
5967 (match_operand:V4SI 1 "register_operand" "")
5968 (match_operand:V4SI 2 "register_operand" "")
5969 (match_operand:V2DI 3 "register_operand" "")]
5970 "TARGET_SSE2"
5971 {
5972 rtx t1, t2, t3, t4;
5973
5974 t1 = gen_reg_rtx (V2DImode);
5975 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5976 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5977
5978 t2 = gen_reg_rtx (V4SImode);
5979 t3 = gen_reg_rtx (V4SImode);
5980 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
5981 gen_lowpart (TImode, operands[1]),
5982 GEN_INT (32)));
5983 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
5984 gen_lowpart (TImode, operands[2]),
5985 GEN_INT (32)));
5986
5987 t4 = gen_reg_rtx (V2DImode);
5988 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5989
5990 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5991 DONE;
5992 })
5993
5994 (define_insn "*avx_ashr<mode>3"
5995 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5996 (ashiftrt:SSEMODE24
5997 (match_operand:SSEMODE24 1 "register_operand" "x")
5998 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5999 "TARGET_AVX"
6000 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6001 [(set_attr "type" "sseishft")
6002 (set_attr "prefix" "vex")
6003 (set (attr "length_immediate")
6004 (if_then_else (match_operand 2 "const_int_operand" "")
6005 (const_string "1")
6006 (const_string "0")))
6007 (set_attr "mode" "TI")])
6008
6009 (define_insn "ashr<mode>3"
6010 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6011 (ashiftrt:SSEMODE24
6012 (match_operand:SSEMODE24 1 "register_operand" "0")
6013 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6014 "TARGET_SSE2"
6015 "psra<ssevecsize>\t{%2, %0|%0, %2}"
6016 [(set_attr "type" "sseishft")
6017 (set_attr "prefix_data16" "1")
6018 (set (attr "length_immediate")
6019 (if_then_else (match_operand 2 "const_int_operand" "")
6020 (const_string "1")
6021 (const_string "0")))
6022 (set_attr "mode" "TI")])
6023
6024 (define_insn "*avx_lshr<mode>3"
6025 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6026 (lshiftrt:SSEMODE248
6027 (match_operand:SSEMODE248 1 "register_operand" "x")
6028 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6029 "TARGET_AVX"
6030 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6031 [(set_attr "type" "sseishft")
6032 (set_attr "prefix" "vex")
6033 (set (attr "length_immediate")
6034 (if_then_else (match_operand 2 "const_int_operand" "")
6035 (const_string "1")
6036 (const_string "0")))
6037 (set_attr "mode" "TI")])
6038
6039 (define_insn "lshr<mode>3"
6040 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6041 (lshiftrt:SSEMODE248
6042 (match_operand:SSEMODE248 1 "register_operand" "0")
6043 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6044 "TARGET_SSE2"
6045 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
6046 [(set_attr "type" "sseishft")
6047 (set_attr "prefix_data16" "1")
6048 (set (attr "length_immediate")
6049 (if_then_else (match_operand 2 "const_int_operand" "")
6050 (const_string "1")
6051 (const_string "0")))
6052 (set_attr "mode" "TI")])
6053
6054 (define_insn "*avx_ashl<mode>3"
6055 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6056 (ashift:SSEMODE248
6057 (match_operand:SSEMODE248 1 "register_operand" "x")
6058 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6059 "TARGET_AVX"
6060 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6061 [(set_attr "type" "sseishft")
6062 (set_attr "prefix" "vex")
6063 (set (attr "length_immediate")
6064 (if_then_else (match_operand 2 "const_int_operand" "")
6065 (const_string "1")
6066 (const_string "0")))
6067 (set_attr "mode" "TI")])
6068
6069 (define_insn "ashl<mode>3"
6070 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6071 (ashift:SSEMODE248
6072 (match_operand:SSEMODE248 1 "register_operand" "0")
6073 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6074 "TARGET_SSE2"
6075 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6076 [(set_attr "type" "sseishft")
6077 (set_attr "prefix_data16" "1")
6078 (set (attr "length_immediate")
6079 (if_then_else (match_operand 2 "const_int_operand" "")
6080 (const_string "1")
6081 (const_string "0")))
6082 (set_attr "mode" "TI")])
6083
6084 (define_expand "vec_shl_<mode>"
6085 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6086 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
6087 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6088 "TARGET_SSE2"
6089 {
6090 operands[0] = gen_lowpart (TImode, operands[0]);
6091 operands[1] = gen_lowpart (TImode, operands[1]);
6092 })
6093
6094 (define_expand "vec_shr_<mode>"
6095 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6096 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
6097 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6098 "TARGET_SSE2"
6099 {
6100 operands[0] = gen_lowpart (TImode, operands[0]);
6101 operands[1] = gen_lowpart (TImode, operands[1]);
6102 })
6103
6104 (define_insn "*avx_<code><mode>3"
6105 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6106 (maxmin:SSEMODE124
6107 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6108 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6109 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6110 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6111 [(set_attr "type" "sseiadd")
6112 (set (attr "prefix_extra")
6113 (if_then_else
6114 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
6115 (const_int 0))
6116 (const_string "1")
6117 (const_string "0")))
6118 (set_attr "prefix" "vex")
6119 (set_attr "mode" "TI")])
6120
6121 (define_expand "<code>v16qi3"
6122 [(set (match_operand:V16QI 0 "register_operand" "")
6123 (umaxmin:V16QI
6124 (match_operand:V16QI 1 "nonimmediate_operand" "")
6125 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6126 "TARGET_SSE2"
6127 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6128
6129 (define_insn "*<code>v16qi3"
6130 [(set (match_operand:V16QI 0 "register_operand" "=x")
6131 (umaxmin:V16QI
6132 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6133 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6134 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6135 "p<maxminiprefix>b\t{%2, %0|%0, %2}"
6136 [(set_attr "type" "sseiadd")
6137 (set_attr "prefix_data16" "1")
6138 (set_attr "mode" "TI")])
6139
6140 (define_expand "<code>v8hi3"
6141 [(set (match_operand:V8HI 0 "register_operand" "")
6142 (smaxmin:V8HI
6143 (match_operand:V8HI 1 "nonimmediate_operand" "")
6144 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6145 "TARGET_SSE2"
6146 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6147
6148 (define_insn "*<code>v8hi3"
6149 [(set (match_operand:V8HI 0 "register_operand" "=x")
6150 (smaxmin:V8HI
6151 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6152 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6153 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6154 "p<maxminiprefix>w\t{%2, %0|%0, %2}"
6155 [(set_attr "type" "sseiadd")
6156 (set_attr "prefix_data16" "1")
6157 (set_attr "mode" "TI")])
6158
6159 (define_expand "umaxv8hi3"
6160 [(set (match_operand:V8HI 0 "register_operand" "")
6161 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6162 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6163 "TARGET_SSE2"
6164 {
6165 if (TARGET_SSE4_1)
6166 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6167 else
6168 {
6169 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6170 if (rtx_equal_p (op3, op2))
6171 op3 = gen_reg_rtx (V8HImode);
6172 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6173 emit_insn (gen_addv8hi3 (op0, op3, op2));
6174 DONE;
6175 }
6176 })
6177
6178 (define_expand "smax<mode>3"
6179 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6180 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6181 (match_operand:SSEMODE14 2 "register_operand" "")))]
6182 "TARGET_SSE2"
6183 {
6184 if (TARGET_SSE4_1)
6185 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6186 else
6187 {
6188 rtx xops[6];
6189 bool ok;
6190
6191 xops[0] = operands[0];
6192 xops[1] = operands[1];
6193 xops[2] = operands[2];
6194 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6195 xops[4] = operands[1];
6196 xops[5] = operands[2];
6197 ok = ix86_expand_int_vcond (xops);
6198 gcc_assert (ok);
6199 DONE;
6200 }
6201 })
6202
6203 (define_insn "*sse4_1_<code><mode>3"
6204 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6205 (smaxmin:SSEMODE14
6206 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6207 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6208 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6209 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
6210 [(set_attr "type" "sseiadd")
6211 (set_attr "prefix_extra" "1")
6212 (set_attr "mode" "TI")])
6213
6214 (define_expand "umaxv4si3"
6215 [(set (match_operand:V4SI 0 "register_operand" "")
6216 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6217 (match_operand:V4SI 2 "register_operand" "")))]
6218 "TARGET_SSE2"
6219 {
6220 if (TARGET_SSE4_1)
6221 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6222 else
6223 {
6224 rtx xops[6];
6225 bool ok;
6226
6227 xops[0] = operands[0];
6228 xops[1] = operands[1];
6229 xops[2] = operands[2];
6230 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6231 xops[4] = operands[1];
6232 xops[5] = operands[2];
6233 ok = ix86_expand_int_vcond (xops);
6234 gcc_assert (ok);
6235 DONE;
6236 }
6237 })
6238
6239 (define_insn "*sse4_1_<code><mode>3"
6240 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6241 (umaxmin:SSEMODE24
6242 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6243 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6244 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6245 "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
6246 [(set_attr "type" "sseiadd")
6247 (set_attr "prefix_extra" "1")
6248 (set_attr "mode" "TI")])
6249
6250 (define_expand "smin<mode>3"
6251 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6252 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6253 (match_operand:SSEMODE14 2 "register_operand" "")))]
6254 "TARGET_SSE2"
6255 {
6256 if (TARGET_SSE4_1)
6257 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6258 else
6259 {
6260 rtx xops[6];
6261 bool ok;
6262
6263 xops[0] = operands[0];
6264 xops[1] = operands[2];
6265 xops[2] = operands[1];
6266 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6267 xops[4] = operands[1];
6268 xops[5] = operands[2];
6269 ok = ix86_expand_int_vcond (xops);
6270 gcc_assert (ok);
6271 DONE;
6272 }
6273 })
6274
6275 (define_expand "umin<mode>3"
6276 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6277 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6278 (match_operand:SSEMODE24 2 "register_operand" "")))]
6279 "TARGET_SSE2"
6280 {
6281 if (TARGET_SSE4_1)
6282 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6283 else
6284 {
6285 rtx xops[6];
6286 bool ok;
6287
6288 xops[0] = operands[0];
6289 xops[1] = operands[2];
6290 xops[2] = operands[1];
6291 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6292 xops[4] = operands[1];
6293 xops[5] = operands[2];
6294 ok = ix86_expand_int_vcond (xops);
6295 gcc_assert (ok);
6296 DONE;
6297 }
6298 })
6299
6300 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6301 ;;
6302 ;; Parallel integral comparisons
6303 ;;
6304 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6305
6306 (define_expand "sse2_eq<mode>3"
6307 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6308 (eq:SSEMODE124
6309 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6310 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6311 "TARGET_SSE2 && !TARGET_XOP "
6312 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6313
6314 (define_insn "*avx_eq<mode>3"
6315 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6316 (eq:SSEMODE1248
6317 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6318 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6319 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6320 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6321 [(set_attr "type" "ssecmp")
6322 (set (attr "prefix_extra")
6323 (if_then_else (match_operand:V2DI 0 "" "")
6324 (const_string "1")
6325 (const_string "*")))
6326 (set_attr "prefix" "vex")
6327 (set_attr "mode" "TI")])
6328
6329 (define_insn "*sse2_eq<mode>3"
6330 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6331 (eq:SSEMODE124
6332 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6333 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6334 "TARGET_SSE2 && !TARGET_XOP
6335 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6336 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6337 [(set_attr "type" "ssecmp")
6338 (set_attr "prefix_data16" "1")
6339 (set_attr "mode" "TI")])
6340
6341 (define_expand "sse4_1_eqv2di3"
6342 [(set (match_operand:V2DI 0 "register_operand" "")
6343 (eq:V2DI
6344 (match_operand:V2DI 1 "nonimmediate_operand" "")
6345 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6346 "TARGET_SSE4_1"
6347 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6348
6349 (define_insn "*sse4_1_eqv2di3"
6350 [(set (match_operand:V2DI 0 "register_operand" "=x")
6351 (eq:V2DI
6352 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6353 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6354 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6355 "pcmpeqq\t{%2, %0|%0, %2}"
6356 [(set_attr "type" "ssecmp")
6357 (set_attr "prefix_extra" "1")
6358 (set_attr "mode" "TI")])
6359
6360 (define_insn "*avx_gt<mode>3"
6361 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6362 (gt:SSEMODE1248
6363 (match_operand:SSEMODE1248 1 "register_operand" "x")
6364 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6365 "TARGET_AVX"
6366 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6367 [(set_attr "type" "ssecmp")
6368 (set (attr "prefix_extra")
6369 (if_then_else (match_operand:V2DI 0 "" "")
6370 (const_string "1")
6371 (const_string "*")))
6372 (set_attr "prefix" "vex")
6373 (set_attr "mode" "TI")])
6374
6375 (define_insn "sse2_gt<mode>3"
6376 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6377 (gt:SSEMODE124
6378 (match_operand:SSEMODE124 1 "register_operand" "0")
6379 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6380 "TARGET_SSE2 && !TARGET_XOP"
6381 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6382 [(set_attr "type" "ssecmp")
6383 (set_attr "prefix_data16" "1")
6384 (set_attr "mode" "TI")])
6385
6386 (define_insn "sse4_2_gtv2di3"
6387 [(set (match_operand:V2DI 0 "register_operand" "=x")
6388 (gt:V2DI
6389 (match_operand:V2DI 1 "register_operand" "0")
6390 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6391 "TARGET_SSE4_2"
6392 "pcmpgtq\t{%2, %0|%0, %2}"
6393 [(set_attr "type" "ssecmp")
6394 (set_attr "prefix_extra" "1")
6395 (set_attr "mode" "TI")])
6396
6397 (define_expand "vcond<mode>"
6398 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6399 (if_then_else:SSEMODE124C8
6400 (match_operator 3 ""
6401 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6402 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6403 (match_operand:SSEMODE124C8 1 "general_operand" "")
6404 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6405 "TARGET_SSE2"
6406 {
6407 bool ok = ix86_expand_int_vcond (operands);
6408 gcc_assert (ok);
6409 DONE;
6410 })
6411
6412 (define_expand "vcondu<mode>"
6413 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6414 (if_then_else:SSEMODE124C8
6415 (match_operator 3 ""
6416 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6417 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6418 (match_operand:SSEMODE124C8 1 "general_operand" "")
6419 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6420 "TARGET_SSE2"
6421 {
6422 bool ok = ix86_expand_int_vcond (operands);
6423 gcc_assert (ok);
6424 DONE;
6425 })
6426
6427 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6428 ;;
6429 ;; Parallel bitwise logical operations
6430 ;;
6431 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6432
6433 (define_expand "one_cmpl<mode>2"
6434 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6435 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6436 (match_dup 2)))]
6437 "TARGET_SSE2"
6438 {
6439 int i, n = GET_MODE_NUNITS (<MODE>mode);
6440 rtvec v = rtvec_alloc (n);
6441
6442 for (i = 0; i < n; ++i)
6443 RTVEC_ELT (v, i) = constm1_rtx;
6444
6445 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6446 })
6447
6448 (define_insn "*avx_andnot<mode>3"
6449 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6450 (and:AVX256MODEI
6451 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6452 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6453 "TARGET_AVX"
6454 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6455 [(set_attr "type" "sselog")
6456 (set_attr "prefix" "vex")
6457 (set_attr "mode" "<avxvecpsmode>")])
6458
6459 (define_insn "*sse_andnot<mode>3"
6460 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6461 (and:SSEMODEI
6462 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6463 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6464 "(TARGET_SSE && !TARGET_SSE2)"
6465 "andnps\t{%2, %0|%0, %2}"
6466 [(set_attr "type" "sselog")
6467 (set_attr "mode" "V4SF")])
6468
6469 (define_insn "*avx_andnot<mode>3"
6470 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6471 (and:SSEMODEI
6472 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6473 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6474 "TARGET_AVX"
6475 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6476 [(set_attr "type" "sselog")
6477 (set_attr "prefix" "vex")
6478 (set_attr "mode" "TI")])
6479
6480 (define_insn "sse2_andnot<mode>3"
6481 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6482 (and:SSEMODEI
6483 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6484 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6485 "TARGET_SSE2"
6486 "pandn\t{%2, %0|%0, %2}"
6487 [(set_attr "type" "sselog")
6488 (set_attr "prefix_data16" "1")
6489 (set_attr "mode" "TI")])
6490
6491 (define_insn "*andnottf3"
6492 [(set (match_operand:TF 0 "register_operand" "=x")
6493 (and:TF
6494 (not:TF (match_operand:TF 1 "register_operand" "0"))
6495 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6496 "TARGET_SSE2"
6497 "pandn\t{%2, %0|%0, %2}"
6498 [(set_attr "type" "sselog")
6499 (set_attr "prefix_data16" "1")
6500 (set_attr "mode" "TI")])
6501
6502 (define_expand "<code><mode>3"
6503 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6504 (plogic:SSEMODEI
6505 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6506 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6507 "TARGET_SSE"
6508 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6509
6510 (define_insn "*avx_<code><mode>3"
6511 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6512 (plogic:AVX256MODEI
6513 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6514 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6515 "TARGET_AVX
6516 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6517 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
6518 [(set_attr "type" "sselog")
6519 (set_attr "prefix" "vex")
6520 (set_attr "mode" "<avxvecpsmode>")])
6521
6522 (define_insn "*sse_<code><mode>3"
6523 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6524 (plogic:SSEMODEI
6525 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6526 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6527 "(TARGET_SSE && !TARGET_SSE2)
6528 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6529 "<plogicprefix>ps\t{%2, %0|%0, %2}"
6530 [(set_attr "type" "sselog")
6531 (set_attr "mode" "V4SF")])
6532
6533 (define_insn "*avx_<code><mode>3"
6534 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6535 (plogic:SSEMODEI
6536 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6537 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6538 "TARGET_AVX
6539 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6540 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
6541 [(set_attr "type" "sselog")
6542 (set_attr "prefix" "vex")
6543 (set_attr "mode" "TI")])
6544
6545 (define_insn "*sse2_<code><mode>3"
6546 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6547 (plogic:SSEMODEI
6548 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6549 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6550 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6551 "p<plogicprefix>\t{%2, %0|%0, %2}"
6552 [(set_attr "type" "sselog")
6553 (set_attr "prefix_data16" "1")
6554 (set_attr "mode" "TI")])
6555
6556 (define_expand "<code>tf3"
6557 [(set (match_operand:TF 0 "register_operand" "")
6558 (plogic:TF
6559 (match_operand:TF 1 "nonimmediate_operand" "")
6560 (match_operand:TF 2 "nonimmediate_operand" "")))]
6561 "TARGET_SSE2"
6562 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6563
6564 (define_insn "*<code>tf3"
6565 [(set (match_operand:TF 0 "register_operand" "=x")
6566 (plogic:TF
6567 (match_operand:TF 1 "nonimmediate_operand" "%0")
6568 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6569 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6570 "p<plogicprefix>\t{%2, %0|%0, %2}"
6571 [(set_attr "type" "sselog")
6572 (set_attr "prefix_data16" "1")
6573 (set_attr "mode" "TI")])
6574
6575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6576 ;;
6577 ;; Parallel integral element swizzling
6578 ;;
6579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6580
6581 ;; Reduce:
6582 ;; op1 = abcdefghijklmnop
6583 ;; op2 = qrstuvwxyz012345
6584 ;; h1 = aqbrcsdteufvgwhx
6585 ;; l1 = iyjzk0l1m2n3o4p5
6586 ;; h2 = aiqybjrzcks0dlt1
6587 ;; l2 = emu2fnv3gow4hpx5
6588 ;; h3 = aeimquy2bfjnrvz3
6589 ;; l3 = cgkosw04dhlptx15
6590 ;; result = bdfhjlnprtvxz135
6591 (define_expand "vec_pack_trunc_v8hi"
6592 [(match_operand:V16QI 0 "register_operand" "")
6593 (match_operand:V8HI 1 "register_operand" "")
6594 (match_operand:V8HI 2 "register_operand" "")]
6595 "TARGET_SSE2"
6596 {
6597 rtx op1, op2, h1, l1, h2, l2, h3, l3;
6598
6599 op1 = gen_lowpart (V16QImode, operands[1]);
6600 op2 = gen_lowpart (V16QImode, operands[2]);
6601 h1 = gen_reg_rtx (V16QImode);
6602 l1 = gen_reg_rtx (V16QImode);
6603 h2 = gen_reg_rtx (V16QImode);
6604 l2 = gen_reg_rtx (V16QImode);
6605 h3 = gen_reg_rtx (V16QImode);
6606 l3 = gen_reg_rtx (V16QImode);
6607
6608 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
6609 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
6610 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
6611 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
6612 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
6613 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
6614 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
6615 DONE;
6616 })
6617
6618 ;; Reduce:
6619 ;; op1 = abcdefgh
6620 ;; op2 = ijklmnop
6621 ;; h1 = aibjckdl
6622 ;; l1 = emfngohp
6623 ;; h2 = aeimbfjn
6624 ;; l2 = cgkodhlp
6625 ;; result = bdfhjlnp
6626 (define_expand "vec_pack_trunc_v4si"
6627 [(match_operand:V8HI 0 "register_operand" "")
6628 (match_operand:V4SI 1 "register_operand" "")
6629 (match_operand:V4SI 2 "register_operand" "")]
6630 "TARGET_SSE2"
6631 {
6632 rtx op1, op2, h1, l1, h2, l2;
6633
6634 op1 = gen_lowpart (V8HImode, operands[1]);
6635 op2 = gen_lowpart (V8HImode, operands[2]);
6636 h1 = gen_reg_rtx (V8HImode);
6637 l1 = gen_reg_rtx (V8HImode);
6638 h2 = gen_reg_rtx (V8HImode);
6639 l2 = gen_reg_rtx (V8HImode);
6640
6641 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
6642 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
6643 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
6644 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
6645 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
6646 DONE;
6647 })
6648
6649 ;; Reduce:
6650 ;; op1 = abcd
6651 ;; op2 = efgh
6652 ;; h1 = aebf
6653 ;; l1 = cgdh
6654 ;; result = bdfh
6655 (define_expand "vec_pack_trunc_v2di"
6656 [(match_operand:V4SI 0 "register_operand" "")
6657 (match_operand:V2DI 1 "register_operand" "")
6658 (match_operand:V2DI 2 "register_operand" "")]
6659 "TARGET_SSE2"
6660 {
6661 rtx op1, op2, h1, l1;
6662
6663 op1 = gen_lowpart (V4SImode, operands[1]);
6664 op2 = gen_lowpart (V4SImode, operands[2]);
6665 h1 = gen_reg_rtx (V4SImode);
6666 l1 = gen_reg_rtx (V4SImode);
6667
6668 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
6669 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
6670 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
6671 DONE;
6672 })
6673
6674 (define_expand "vec_interleave_highv16qi"
6675 [(set (match_operand:V16QI 0 "register_operand" "")
6676 (vec_select:V16QI
6677 (vec_concat:V32QI
6678 (match_operand:V16QI 1 "register_operand" "")
6679 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6680 (parallel [(const_int 8) (const_int 24)
6681 (const_int 9) (const_int 25)
6682 (const_int 10) (const_int 26)
6683 (const_int 11) (const_int 27)
6684 (const_int 12) (const_int 28)
6685 (const_int 13) (const_int 29)
6686 (const_int 14) (const_int 30)
6687 (const_int 15) (const_int 31)])))]
6688 "TARGET_SSE2"
6689 {
6690 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
6691 DONE;
6692 })
6693
6694 (define_expand "vec_interleave_lowv16qi"
6695 [(set (match_operand:V16QI 0 "register_operand" "")
6696 (vec_select:V16QI
6697 (vec_concat:V32QI
6698 (match_operand:V16QI 1 "register_operand" "")
6699 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6700 (parallel [(const_int 0) (const_int 16)
6701 (const_int 1) (const_int 17)
6702 (const_int 2) (const_int 18)
6703 (const_int 3) (const_int 19)
6704 (const_int 4) (const_int 20)
6705 (const_int 5) (const_int 21)
6706 (const_int 6) (const_int 22)
6707 (const_int 7) (const_int 23)])))]
6708 "TARGET_SSE2"
6709 {
6710 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
6711 DONE;
6712 })
6713
6714 (define_expand "vec_interleave_highv8hi"
6715 [(set (match_operand:V8HI 0 "register_operand" "=")
6716 (vec_select:V8HI
6717 (vec_concat:V16HI
6718 (match_operand:V8HI 1 "register_operand" "")
6719 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6720 (parallel [(const_int 4) (const_int 12)
6721 (const_int 5) (const_int 13)
6722 (const_int 6) (const_int 14)
6723 (const_int 7) (const_int 15)])))]
6724 "TARGET_SSE2"
6725 {
6726 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
6727 DONE;
6728 })
6729
6730 (define_expand "vec_interleave_lowv8hi"
6731 [(set (match_operand:V8HI 0 "register_operand" "")
6732 (vec_select:V8HI
6733 (vec_concat:V16HI
6734 (match_operand:V8HI 1 "register_operand" "")
6735 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6736 (parallel [(const_int 0) (const_int 8)
6737 (const_int 1) (const_int 9)
6738 (const_int 2) (const_int 10)
6739 (const_int 3) (const_int 11)])))]
6740 "TARGET_SSE2"
6741 {
6742 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
6743 DONE;
6744 })
6745
6746 (define_expand "vec_interleave_highv4si"
6747 [(set (match_operand:V4SI 0 "register_operand" "")
6748 (vec_select:V4SI
6749 (vec_concat:V8SI
6750 (match_operand:V4SI 1 "register_operand" "")
6751 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6752 (parallel [(const_int 2) (const_int 6)
6753 (const_int 3) (const_int 7)])))]
6754 "TARGET_SSE2"
6755 {
6756 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
6757 DONE;
6758 })
6759
6760 (define_expand "vec_interleave_lowv4si"
6761 [(set (match_operand:V4SI 0 "register_operand" "")
6762 (vec_select:V4SI
6763 (vec_concat:V8SI
6764 (match_operand:V4SI 1 "register_operand" "")
6765 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6766 (parallel [(const_int 0) (const_int 4)
6767 (const_int 1) (const_int 5)])))]
6768 "TARGET_SSE2"
6769 {
6770 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
6771 DONE;
6772 })
6773
6774 (define_expand "vec_interleave_highv2di"
6775 [(set (match_operand:V2DI 0 "register_operand" "")
6776 (vec_select:V2DI
6777 (vec_concat:V4DI
6778 (match_operand:V2DI 1 "register_operand" "")
6779 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6780 (parallel [(const_int 1)
6781 (const_int 3)])))]
6782 "TARGET_SSE2"
6783 {
6784 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
6785 DONE;
6786 })
6787
6788 (define_expand "vec_interleave_lowv2di"
6789 [(set (match_operand:V2DI 0 "register_operand" "")
6790 (vec_select:V2DI
6791 (vec_concat:V4DI
6792 (match_operand:V2DI 1 "register_operand" "")
6793 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6794 (parallel [(const_int 0)
6795 (const_int 2)])))]
6796 "TARGET_SSE2"
6797 {
6798 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
6799 DONE;
6800 })
6801
6802 (define_expand "vec_interleave_highv4sf"
6803 [(set (match_operand:V4SF 0 "register_operand" "")
6804 (vec_select:V4SF
6805 (vec_concat:V8SF
6806 (match_operand:V4SF 1 "register_operand" "")
6807 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6808 (parallel [(const_int 2) (const_int 6)
6809 (const_int 3) (const_int 7)])))]
6810 "TARGET_SSE")
6811
6812 (define_expand "vec_interleave_lowv4sf"
6813 [(set (match_operand:V4SF 0 "register_operand" "")
6814 (vec_select:V4SF
6815 (vec_concat:V8SF
6816 (match_operand:V4SF 1 "register_operand" "")
6817 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6818 (parallel [(const_int 0) (const_int 4)
6819 (const_int 1) (const_int 5)])))]
6820 "TARGET_SSE")
6821
6822 (define_expand "vec_interleave_highv2df"
6823 [(set (match_operand:V2DF 0 "register_operand" "")
6824 (vec_select:V2DF
6825 (vec_concat:V4DF
6826 (match_operand:V2DF 1 "register_operand" "")
6827 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6828 (parallel [(const_int 1)
6829 (const_int 3)])))]
6830 "TARGET_SSE2")
6831
6832 (define_expand "vec_interleave_lowv2df"
6833 [(set (match_operand:V2DF 0 "register_operand" "")
6834 (vec_select:V2DF
6835 (vec_concat:V4DF
6836 (match_operand:V2DF 1 "register_operand" "")
6837 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6838 (parallel [(const_int 0)
6839 (const_int 2)])))]
6840 "TARGET_SSE2")
6841
6842 (define_insn "*avx_packsswb"
6843 [(set (match_operand:V16QI 0 "register_operand" "=x")
6844 (vec_concat:V16QI
6845 (ss_truncate:V8QI
6846 (match_operand:V8HI 1 "register_operand" "x"))
6847 (ss_truncate:V8QI
6848 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6849 "TARGET_AVX"
6850 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6851 [(set_attr "type" "sselog")
6852 (set_attr "prefix" "vex")
6853 (set_attr "mode" "TI")])
6854
6855 (define_insn "sse2_packsswb"
6856 [(set (match_operand:V16QI 0 "register_operand" "=x")
6857 (vec_concat:V16QI
6858 (ss_truncate:V8QI
6859 (match_operand:V8HI 1 "register_operand" "0"))
6860 (ss_truncate:V8QI
6861 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6862 "TARGET_SSE2"
6863 "packsswb\t{%2, %0|%0, %2}"
6864 [(set_attr "type" "sselog")
6865 (set_attr "prefix_data16" "1")
6866 (set_attr "mode" "TI")])
6867
6868 (define_insn "*avx_packssdw"
6869 [(set (match_operand:V8HI 0 "register_operand" "=x")
6870 (vec_concat:V8HI
6871 (ss_truncate:V4HI
6872 (match_operand:V4SI 1 "register_operand" "x"))
6873 (ss_truncate:V4HI
6874 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6875 "TARGET_AVX"
6876 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6877 [(set_attr "type" "sselog")
6878 (set_attr "prefix" "vex")
6879 (set_attr "mode" "TI")])
6880
6881 (define_insn "sse2_packssdw"
6882 [(set (match_operand:V8HI 0 "register_operand" "=x")
6883 (vec_concat:V8HI
6884 (ss_truncate:V4HI
6885 (match_operand:V4SI 1 "register_operand" "0"))
6886 (ss_truncate:V4HI
6887 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6888 "TARGET_SSE2"
6889 "packssdw\t{%2, %0|%0, %2}"
6890 [(set_attr "type" "sselog")
6891 (set_attr "prefix_data16" "1")
6892 (set_attr "mode" "TI")])
6893
6894 (define_insn "*avx_packuswb"
6895 [(set (match_operand:V16QI 0 "register_operand" "=x")
6896 (vec_concat:V16QI
6897 (us_truncate:V8QI
6898 (match_operand:V8HI 1 "register_operand" "x"))
6899 (us_truncate:V8QI
6900 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6901 "TARGET_AVX"
6902 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6903 [(set_attr "type" "sselog")
6904 (set_attr "prefix" "vex")
6905 (set_attr "mode" "TI")])
6906
6907 (define_insn "sse2_packuswb"
6908 [(set (match_operand:V16QI 0 "register_operand" "=x")
6909 (vec_concat:V16QI
6910 (us_truncate:V8QI
6911 (match_operand:V8HI 1 "register_operand" "0"))
6912 (us_truncate:V8QI
6913 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6914 "TARGET_SSE2"
6915 "packuswb\t{%2, %0|%0, %2}"
6916 [(set_attr "type" "sselog")
6917 (set_attr "prefix_data16" "1")
6918 (set_attr "mode" "TI")])
6919
6920 (define_insn "*avx_punpckhbw"
6921 [(set (match_operand:V16QI 0 "register_operand" "=x")
6922 (vec_select:V16QI
6923 (vec_concat:V32QI
6924 (match_operand:V16QI 1 "register_operand" "x")
6925 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6926 (parallel [(const_int 8) (const_int 24)
6927 (const_int 9) (const_int 25)
6928 (const_int 10) (const_int 26)
6929 (const_int 11) (const_int 27)
6930 (const_int 12) (const_int 28)
6931 (const_int 13) (const_int 29)
6932 (const_int 14) (const_int 30)
6933 (const_int 15) (const_int 31)])))]
6934 "TARGET_AVX"
6935 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6936 [(set_attr "type" "sselog")
6937 (set_attr "prefix" "vex")
6938 (set_attr "mode" "TI")])
6939
6940 (define_insn "sse2_punpckhbw"
6941 [(set (match_operand:V16QI 0 "register_operand" "=x")
6942 (vec_select:V16QI
6943 (vec_concat:V32QI
6944 (match_operand:V16QI 1 "register_operand" "0")
6945 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6946 (parallel [(const_int 8) (const_int 24)
6947 (const_int 9) (const_int 25)
6948 (const_int 10) (const_int 26)
6949 (const_int 11) (const_int 27)
6950 (const_int 12) (const_int 28)
6951 (const_int 13) (const_int 29)
6952 (const_int 14) (const_int 30)
6953 (const_int 15) (const_int 31)])))]
6954 "TARGET_SSE2"
6955 "punpckhbw\t{%2, %0|%0, %2}"
6956 [(set_attr "type" "sselog")
6957 (set_attr "prefix_data16" "1")
6958 (set_attr "mode" "TI")])
6959
6960 (define_insn "*avx_punpcklbw"
6961 [(set (match_operand:V16QI 0 "register_operand" "=x")
6962 (vec_select:V16QI
6963 (vec_concat:V32QI
6964 (match_operand:V16QI 1 "register_operand" "x")
6965 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6966 (parallel [(const_int 0) (const_int 16)
6967 (const_int 1) (const_int 17)
6968 (const_int 2) (const_int 18)
6969 (const_int 3) (const_int 19)
6970 (const_int 4) (const_int 20)
6971 (const_int 5) (const_int 21)
6972 (const_int 6) (const_int 22)
6973 (const_int 7) (const_int 23)])))]
6974 "TARGET_AVX"
6975 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6976 [(set_attr "type" "sselog")
6977 (set_attr "prefix" "vex")
6978 (set_attr "mode" "TI")])
6979
6980 (define_insn "sse2_punpcklbw"
6981 [(set (match_operand:V16QI 0 "register_operand" "=x")
6982 (vec_select:V16QI
6983 (vec_concat:V32QI
6984 (match_operand:V16QI 1 "register_operand" "0")
6985 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6986 (parallel [(const_int 0) (const_int 16)
6987 (const_int 1) (const_int 17)
6988 (const_int 2) (const_int 18)
6989 (const_int 3) (const_int 19)
6990 (const_int 4) (const_int 20)
6991 (const_int 5) (const_int 21)
6992 (const_int 6) (const_int 22)
6993 (const_int 7) (const_int 23)])))]
6994 "TARGET_SSE2"
6995 "punpcklbw\t{%2, %0|%0, %2}"
6996 [(set_attr "type" "sselog")
6997 (set_attr "prefix_data16" "1")
6998 (set_attr "mode" "TI")])
6999
7000 (define_insn "*avx_punpckhwd"
7001 [(set (match_operand:V8HI 0 "register_operand" "=x")
7002 (vec_select:V8HI
7003 (vec_concat:V16HI
7004 (match_operand:V8HI 1 "register_operand" "x")
7005 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7006 (parallel [(const_int 4) (const_int 12)
7007 (const_int 5) (const_int 13)
7008 (const_int 6) (const_int 14)
7009 (const_int 7) (const_int 15)])))]
7010 "TARGET_AVX"
7011 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
7012 [(set_attr "type" "sselog")
7013 (set_attr "prefix" "vex")
7014 (set_attr "mode" "TI")])
7015
7016 (define_insn "sse2_punpckhwd"
7017 [(set (match_operand:V8HI 0 "register_operand" "=x")
7018 (vec_select:V8HI
7019 (vec_concat:V16HI
7020 (match_operand:V8HI 1 "register_operand" "0")
7021 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7022 (parallel [(const_int 4) (const_int 12)
7023 (const_int 5) (const_int 13)
7024 (const_int 6) (const_int 14)
7025 (const_int 7) (const_int 15)])))]
7026 "TARGET_SSE2"
7027 "punpckhwd\t{%2, %0|%0, %2}"
7028 [(set_attr "type" "sselog")
7029 (set_attr "prefix_data16" "1")
7030 (set_attr "mode" "TI")])
7031
7032 (define_insn "*avx_punpcklwd"
7033 [(set (match_operand:V8HI 0 "register_operand" "=x")
7034 (vec_select:V8HI
7035 (vec_concat:V16HI
7036 (match_operand:V8HI 1 "register_operand" "x")
7037 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7038 (parallel [(const_int 0) (const_int 8)
7039 (const_int 1) (const_int 9)
7040 (const_int 2) (const_int 10)
7041 (const_int 3) (const_int 11)])))]
7042 "TARGET_AVX"
7043 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
7044 [(set_attr "type" "sselog")
7045 (set_attr "prefix" "vex")
7046 (set_attr "mode" "TI")])
7047
7048 (define_insn "sse2_punpcklwd"
7049 [(set (match_operand:V8HI 0 "register_operand" "=x")
7050 (vec_select:V8HI
7051 (vec_concat:V16HI
7052 (match_operand:V8HI 1 "register_operand" "0")
7053 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
7054 (parallel [(const_int 0) (const_int 8)
7055 (const_int 1) (const_int 9)
7056 (const_int 2) (const_int 10)
7057 (const_int 3) (const_int 11)])))]
7058 "TARGET_SSE2"
7059 "punpcklwd\t{%2, %0|%0, %2}"
7060 [(set_attr "type" "sselog")
7061 (set_attr "prefix_data16" "1")
7062 (set_attr "mode" "TI")])
7063
7064 (define_insn "*avx_punpckhdq"
7065 [(set (match_operand:V4SI 0 "register_operand" "=x")
7066 (vec_select:V4SI
7067 (vec_concat:V8SI
7068 (match_operand:V4SI 1 "register_operand" "x")
7069 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7070 (parallel [(const_int 2) (const_int 6)
7071 (const_int 3) (const_int 7)])))]
7072 "TARGET_AVX"
7073 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
7074 [(set_attr "type" "sselog")
7075 (set_attr "prefix" "vex")
7076 (set_attr "mode" "TI")])
7077
7078 (define_insn "sse2_punpckhdq"
7079 [(set (match_operand:V4SI 0 "register_operand" "=x")
7080 (vec_select:V4SI
7081 (vec_concat:V8SI
7082 (match_operand:V4SI 1 "register_operand" "0")
7083 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7084 (parallel [(const_int 2) (const_int 6)
7085 (const_int 3) (const_int 7)])))]
7086 "TARGET_SSE2"
7087 "punpckhdq\t{%2, %0|%0, %2}"
7088 [(set_attr "type" "sselog")
7089 (set_attr "prefix_data16" "1")
7090 (set_attr "mode" "TI")])
7091
7092 (define_insn "*avx_punpckldq"
7093 [(set (match_operand:V4SI 0 "register_operand" "=x")
7094 (vec_select:V4SI
7095 (vec_concat:V8SI
7096 (match_operand:V4SI 1 "register_operand" "x")
7097 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7098 (parallel [(const_int 0) (const_int 4)
7099 (const_int 1) (const_int 5)])))]
7100 "TARGET_AVX"
7101 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
7102 [(set_attr "type" "sselog")
7103 (set_attr "prefix" "vex")
7104 (set_attr "mode" "TI")])
7105
7106 (define_insn "sse2_punpckldq"
7107 [(set (match_operand:V4SI 0 "register_operand" "=x")
7108 (vec_select:V4SI
7109 (vec_concat:V8SI
7110 (match_operand:V4SI 1 "register_operand" "0")
7111 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7112 (parallel [(const_int 0) (const_int 4)
7113 (const_int 1) (const_int 5)])))]
7114 "TARGET_SSE2"
7115 "punpckldq\t{%2, %0|%0, %2}"
7116 [(set_attr "type" "sselog")
7117 (set_attr "prefix_data16" "1")
7118 (set_attr "mode" "TI")])
7119
7120 (define_insn "*avx_pinsr<ssevecsize>"
7121 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
7122 (vec_merge:SSEMODE124
7123 (vec_duplicate:SSEMODE124
7124 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
7125 (match_operand:SSEMODE124 1 "register_operand" "x")
7126 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
7127 "TARGET_AVX"
7128 {
7129 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7130 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7131 }
7132 [(set_attr "type" "sselog")
7133 (set (attr "prefix_extra")
7134 (if_then_else (match_operand:V8HI 0 "register_operand" "")
7135 (const_string "0")
7136 (const_string "1")))
7137 (set_attr "length_immediate" "1")
7138 (set_attr "prefix" "vex")
7139 (set_attr "mode" "TI")])
7140
7141 (define_insn "*sse4_1_pinsrb"
7142 [(set (match_operand:V16QI 0 "register_operand" "=x")
7143 (vec_merge:V16QI
7144 (vec_duplicate:V16QI
7145 (match_operand:QI 2 "nonimmediate_operand" "rm"))
7146 (match_operand:V16QI 1 "register_operand" "0")
7147 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
7148 "TARGET_SSE4_1"
7149 {
7150 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7151 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
7152 }
7153 [(set_attr "type" "sselog")
7154 (set_attr "prefix_extra" "1")
7155 (set_attr "length_immediate" "1")
7156 (set_attr "mode" "TI")])
7157
7158 (define_insn "*sse2_pinsrw"
7159 [(set (match_operand:V8HI 0 "register_operand" "=x")
7160 (vec_merge:V8HI
7161 (vec_duplicate:V8HI
7162 (match_operand:HI 2 "nonimmediate_operand" "rm"))
7163 (match_operand:V8HI 1 "register_operand" "0")
7164 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
7165 "TARGET_SSE2"
7166 {
7167 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7168 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
7169 }
7170 [(set_attr "type" "sselog")
7171 (set_attr "prefix_data16" "1")
7172 (set_attr "length_immediate" "1")
7173 (set_attr "mode" "TI")])
7174
7175 ;; It must come before sse2_loadld since it is preferred.
7176 (define_insn "*sse4_1_pinsrd"
7177 [(set (match_operand:V4SI 0 "register_operand" "=x")
7178 (vec_merge:V4SI
7179 (vec_duplicate:V4SI
7180 (match_operand:SI 2 "nonimmediate_operand" "rm"))
7181 (match_operand:V4SI 1 "register_operand" "0")
7182 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
7183 "TARGET_SSE4_1"
7184 {
7185 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7186 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
7187 }
7188 [(set_attr "type" "sselog")
7189 (set_attr "prefix_extra" "1")
7190 (set_attr "length_immediate" "1")
7191 (set_attr "mode" "TI")])
7192
7193 (define_insn "*avx_pinsrq"
7194 [(set (match_operand:V2DI 0 "register_operand" "=x")
7195 (vec_merge:V2DI
7196 (vec_duplicate:V2DI
7197 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7198 (match_operand:V2DI 1 "register_operand" "x")
7199 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7200 "TARGET_AVX && TARGET_64BIT"
7201 {
7202 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7203 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7204 }
7205 [(set_attr "type" "sselog")
7206 (set_attr "prefix_extra" "1")
7207 (set_attr "length_immediate" "1")
7208 (set_attr "prefix" "vex")
7209 (set_attr "mode" "TI")])
7210
7211 (define_insn "*sse4_1_pinsrq"
7212 [(set (match_operand:V2DI 0 "register_operand" "=x")
7213 (vec_merge:V2DI
7214 (vec_duplicate:V2DI
7215 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7216 (match_operand:V2DI 1 "register_operand" "0")
7217 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7218 "TARGET_SSE4_1 && TARGET_64BIT"
7219 {
7220 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7221 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7222 }
7223 [(set_attr "type" "sselog")
7224 (set_attr "prefix_rex" "1")
7225 (set_attr "prefix_extra" "1")
7226 (set_attr "length_immediate" "1")
7227 (set_attr "mode" "TI")])
7228
7229 (define_insn "*sse4_1_pextrb"
7230 [(set (match_operand:SI 0 "register_operand" "=r")
7231 (zero_extend:SI
7232 (vec_select:QI
7233 (match_operand:V16QI 1 "register_operand" "x")
7234 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7235 "TARGET_SSE4_1"
7236 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7237 [(set_attr "type" "sselog")
7238 (set_attr "prefix_extra" "1")
7239 (set_attr "length_immediate" "1")
7240 (set_attr "prefix" "maybe_vex")
7241 (set_attr "mode" "TI")])
7242
7243 (define_insn "*sse4_1_pextrb_memory"
7244 [(set (match_operand:QI 0 "memory_operand" "=m")
7245 (vec_select:QI
7246 (match_operand:V16QI 1 "register_operand" "x")
7247 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7248 "TARGET_SSE4_1"
7249 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7250 [(set_attr "type" "sselog")
7251 (set_attr "prefix_extra" "1")
7252 (set_attr "length_immediate" "1")
7253 (set_attr "prefix" "maybe_vex")
7254 (set_attr "mode" "TI")])
7255
7256 (define_insn "*sse2_pextrw"
7257 [(set (match_operand:SI 0 "register_operand" "=r")
7258 (zero_extend:SI
7259 (vec_select:HI
7260 (match_operand:V8HI 1 "register_operand" "x")
7261 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7262 "TARGET_SSE2"
7263 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7264 [(set_attr "type" "sselog")
7265 (set_attr "prefix_data16" "1")
7266 (set_attr "length_immediate" "1")
7267 (set_attr "prefix" "maybe_vex")
7268 (set_attr "mode" "TI")])
7269
7270 (define_insn "*sse4_1_pextrw_memory"
7271 [(set (match_operand:HI 0 "memory_operand" "=m")
7272 (vec_select:HI
7273 (match_operand:V8HI 1 "register_operand" "x")
7274 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7275 "TARGET_SSE4_1"
7276 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7277 [(set_attr "type" "sselog")
7278 (set_attr "prefix_extra" "1")
7279 (set_attr "length_immediate" "1")
7280 (set_attr "prefix" "maybe_vex")
7281 (set_attr "mode" "TI")])
7282
7283 (define_insn "*sse4_1_pextrd"
7284 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7285 (vec_select:SI
7286 (match_operand:V4SI 1 "register_operand" "x")
7287 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7288 "TARGET_SSE4_1"
7289 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7290 [(set_attr "type" "sselog")
7291 (set_attr "prefix_extra" "1")
7292 (set_attr "length_immediate" "1")
7293 (set_attr "prefix" "maybe_vex")
7294 (set_attr "mode" "TI")])
7295
7296 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7297 (define_insn "*sse4_1_pextrq"
7298 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7299 (vec_select:DI
7300 (match_operand:V2DI 1 "register_operand" "x")
7301 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7302 "TARGET_SSE4_1 && TARGET_64BIT"
7303 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7304 [(set_attr "type" "sselog")
7305 (set_attr "prefix_rex" "1")
7306 (set_attr "prefix_extra" "1")
7307 (set_attr "length_immediate" "1")
7308 (set_attr "prefix" "maybe_vex")
7309 (set_attr "mode" "TI")])
7310
7311 (define_expand "sse2_pshufd"
7312 [(match_operand:V4SI 0 "register_operand" "")
7313 (match_operand:V4SI 1 "nonimmediate_operand" "")
7314 (match_operand:SI 2 "const_int_operand" "")]
7315 "TARGET_SSE2"
7316 {
7317 int mask = INTVAL (operands[2]);
7318 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7319 GEN_INT ((mask >> 0) & 3),
7320 GEN_INT ((mask >> 2) & 3),
7321 GEN_INT ((mask >> 4) & 3),
7322 GEN_INT ((mask >> 6) & 3)));
7323 DONE;
7324 })
7325
7326 (define_insn "sse2_pshufd_1"
7327 [(set (match_operand:V4SI 0 "register_operand" "=x")
7328 (vec_select:V4SI
7329 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7330 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7331 (match_operand 3 "const_0_to_3_operand" "")
7332 (match_operand 4 "const_0_to_3_operand" "")
7333 (match_operand 5 "const_0_to_3_operand" "")])))]
7334 "TARGET_SSE2"
7335 {
7336 int mask = 0;
7337 mask |= INTVAL (operands[2]) << 0;
7338 mask |= INTVAL (operands[3]) << 2;
7339 mask |= INTVAL (operands[4]) << 4;
7340 mask |= INTVAL (operands[5]) << 6;
7341 operands[2] = GEN_INT (mask);
7342
7343 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7344 }
7345 [(set_attr "type" "sselog1")
7346 (set_attr "prefix_data16" "1")
7347 (set_attr "prefix" "maybe_vex")
7348 (set_attr "length_immediate" "1")
7349 (set_attr "mode" "TI")])
7350
7351 (define_expand "sse2_pshuflw"
7352 [(match_operand:V8HI 0 "register_operand" "")
7353 (match_operand:V8HI 1 "nonimmediate_operand" "")
7354 (match_operand:SI 2 "const_int_operand" "")]
7355 "TARGET_SSE2"
7356 {
7357 int mask = INTVAL (operands[2]);
7358 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7359 GEN_INT ((mask >> 0) & 3),
7360 GEN_INT ((mask >> 2) & 3),
7361 GEN_INT ((mask >> 4) & 3),
7362 GEN_INT ((mask >> 6) & 3)));
7363 DONE;
7364 })
7365
7366 (define_insn "sse2_pshuflw_1"
7367 [(set (match_operand:V8HI 0 "register_operand" "=x")
7368 (vec_select:V8HI
7369 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7370 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7371 (match_operand 3 "const_0_to_3_operand" "")
7372 (match_operand 4 "const_0_to_3_operand" "")
7373 (match_operand 5 "const_0_to_3_operand" "")
7374 (const_int 4)
7375 (const_int 5)
7376 (const_int 6)
7377 (const_int 7)])))]
7378 "TARGET_SSE2"
7379 {
7380 int mask = 0;
7381 mask |= INTVAL (operands[2]) << 0;
7382 mask |= INTVAL (operands[3]) << 2;
7383 mask |= INTVAL (operands[4]) << 4;
7384 mask |= INTVAL (operands[5]) << 6;
7385 operands[2] = GEN_INT (mask);
7386
7387 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7388 }
7389 [(set_attr "type" "sselog")
7390 (set_attr "prefix_data16" "0")
7391 (set_attr "prefix_rep" "1")
7392 (set_attr "prefix" "maybe_vex")
7393 (set_attr "length_immediate" "1")
7394 (set_attr "mode" "TI")])
7395
7396 (define_expand "sse2_pshufhw"
7397 [(match_operand:V8HI 0 "register_operand" "")
7398 (match_operand:V8HI 1 "nonimmediate_operand" "")
7399 (match_operand:SI 2 "const_int_operand" "")]
7400 "TARGET_SSE2"
7401 {
7402 int mask = INTVAL (operands[2]);
7403 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7404 GEN_INT (((mask >> 0) & 3) + 4),
7405 GEN_INT (((mask >> 2) & 3) + 4),
7406 GEN_INT (((mask >> 4) & 3) + 4),
7407 GEN_INT (((mask >> 6) & 3) + 4)));
7408 DONE;
7409 })
7410
7411 (define_insn "sse2_pshufhw_1"
7412 [(set (match_operand:V8HI 0 "register_operand" "=x")
7413 (vec_select:V8HI
7414 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7415 (parallel [(const_int 0)
7416 (const_int 1)
7417 (const_int 2)
7418 (const_int 3)
7419 (match_operand 2 "const_4_to_7_operand" "")
7420 (match_operand 3 "const_4_to_7_operand" "")
7421 (match_operand 4 "const_4_to_7_operand" "")
7422 (match_operand 5 "const_4_to_7_operand" "")])))]
7423 "TARGET_SSE2"
7424 {
7425 int mask = 0;
7426 mask |= (INTVAL (operands[2]) - 4) << 0;
7427 mask |= (INTVAL (operands[3]) - 4) << 2;
7428 mask |= (INTVAL (operands[4]) - 4) << 4;
7429 mask |= (INTVAL (operands[5]) - 4) << 6;
7430 operands[2] = GEN_INT (mask);
7431
7432 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7433 }
7434 [(set_attr "type" "sselog")
7435 (set_attr "prefix_rep" "1")
7436 (set_attr "prefix_data16" "0")
7437 (set_attr "prefix" "maybe_vex")
7438 (set_attr "length_immediate" "1")
7439 (set_attr "mode" "TI")])
7440
7441 (define_expand "sse2_loadd"
7442 [(set (match_operand:V4SI 0 "register_operand" "")
7443 (vec_merge:V4SI
7444 (vec_duplicate:V4SI
7445 (match_operand:SI 1 "nonimmediate_operand" ""))
7446 (match_dup 2)
7447 (const_int 1)))]
7448 "TARGET_SSE"
7449 "operands[2] = CONST0_RTX (V4SImode);")
7450
7451 (define_insn "*avx_loadld"
7452 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7453 (vec_merge:V4SI
7454 (vec_duplicate:V4SI
7455 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7456 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7457 (const_int 1)))]
7458 "TARGET_AVX"
7459 "@
7460 vmovd\t{%2, %0|%0, %2}
7461 vmovd\t{%2, %0|%0, %2}
7462 vmovss\t{%2, %1, %0|%0, %1, %2}"
7463 [(set_attr "type" "ssemov")
7464 (set_attr "prefix" "vex")
7465 (set_attr "mode" "TI,TI,V4SF")])
7466
7467 (define_insn "sse2_loadld"
7468 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7469 (vec_merge:V4SI
7470 (vec_duplicate:V4SI
7471 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7472 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7473 (const_int 1)))]
7474 "TARGET_SSE"
7475 "@
7476 movd\t{%2, %0|%0, %2}
7477 movd\t{%2, %0|%0, %2}
7478 movss\t{%2, %0|%0, %2}
7479 movss\t{%2, %0|%0, %2}"
7480 [(set_attr "type" "ssemov")
7481 (set_attr "mode" "TI,TI,V4SF,SF")])
7482
7483 (define_insn_and_split "sse2_stored"
7484 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7485 (vec_select:SI
7486 (match_operand:V4SI 1 "register_operand" "x,Yi")
7487 (parallel [(const_int 0)])))]
7488 "TARGET_SSE"
7489 "#"
7490 "&& reload_completed
7491 && (TARGET_INTER_UNIT_MOVES
7492 || MEM_P (operands [0])
7493 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7494 [(set (match_dup 0) (match_dup 1))]
7495 {
7496 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
7497 })
7498
7499 (define_insn_and_split "*vec_ext_v4si_mem"
7500 [(set (match_operand:SI 0 "register_operand" "=r")
7501 (vec_select:SI
7502 (match_operand:V4SI 1 "memory_operand" "o")
7503 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7504 ""
7505 "#"
7506 "reload_completed"
7507 [(const_int 0)]
7508 {
7509 int i = INTVAL (operands[2]);
7510
7511 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7512 DONE;
7513 })
7514
7515 (define_expand "sse_storeq"
7516 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7517 (vec_select:DI
7518 (match_operand:V2DI 1 "register_operand" "")
7519 (parallel [(const_int 0)])))]
7520 "TARGET_SSE"
7521 "")
7522
7523 (define_insn "*sse2_storeq_rex64"
7524 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7525 (vec_select:DI
7526 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7527 (parallel [(const_int 0)])))]
7528 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7529 "@
7530 #
7531 #
7532 %vmov{q}\t{%1, %0|%0, %1}"
7533 [(set_attr "type" "*,*,imov")
7534 (set_attr "prefix" "*,*,maybe_vex")
7535 (set_attr "mode" "*,*,DI")])
7536
7537 (define_insn "*sse2_storeq"
7538 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7539 (vec_select:DI
7540 (match_operand:V2DI 1 "register_operand" "x")
7541 (parallel [(const_int 0)])))]
7542 "TARGET_SSE"
7543 "#")
7544
7545 (define_split
7546 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7547 (vec_select:DI
7548 (match_operand:V2DI 1 "register_operand" "")
7549 (parallel [(const_int 0)])))]
7550 "TARGET_SSE
7551 && reload_completed
7552 && (TARGET_INTER_UNIT_MOVES
7553 || MEM_P (operands [0])
7554 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7555 [(set (match_dup 0) (match_dup 1))]
7556 {
7557 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7558 })
7559
7560 (define_insn "*vec_extractv2di_1_rex64_avx"
7561 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7562 (vec_select:DI
7563 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7564 (parallel [(const_int 1)])))]
7565 "TARGET_64BIT
7566 && TARGET_AVX
7567 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7568 "@
7569 vmovhps\t{%1, %0|%0, %1}
7570 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7571 vmovq\t{%H1, %0|%0, %H1}
7572 vmov{q}\t{%H1, %0|%0, %H1}"
7573 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7574 (set_attr "length_immediate" "*,1,*,*")
7575 (set_attr "memory" "*,none,*,*")
7576 (set_attr "prefix" "vex")
7577 (set_attr "mode" "V2SF,TI,TI,DI")])
7578
7579 (define_insn "*vec_extractv2di_1_rex64"
7580 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7581 (vec_select:DI
7582 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7583 (parallel [(const_int 1)])))]
7584 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7585 "@
7586 movhps\t{%1, %0|%0, %1}
7587 psrldq\t{$8, %0|%0, 8}
7588 movq\t{%H1, %0|%0, %H1}
7589 mov{q}\t{%H1, %0|%0, %H1}"
7590 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7591 (set_attr "length_immediate" "*,1,*,*")
7592 (set_attr "atom_unit" "*,sishuf,*,*")
7593 (set_attr "memory" "*,none,*,*")
7594 (set_attr "mode" "V2SF,TI,TI,DI")])
7595
7596 (define_insn "*vec_extractv2di_1_avx"
7597 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7598 (vec_select:DI
7599 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7600 (parallel [(const_int 1)])))]
7601 "!TARGET_64BIT
7602 && TARGET_AVX
7603 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7604 "@
7605 vmovhps\t{%1, %0|%0, %1}
7606 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7607 vmovq\t{%H1, %0|%0, %H1}"
7608 [(set_attr "type" "ssemov,sseishft,ssemov")
7609 (set_attr "length_immediate" "*,1,*")
7610 (set_attr "memory" "*,none,*")
7611 (set_attr "prefix" "vex")
7612 (set_attr "mode" "V2SF,TI,TI")])
7613
7614 (define_insn "*vec_extractv2di_1_sse2"
7615 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7616 (vec_select:DI
7617 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7618 (parallel [(const_int 1)])))]
7619 "!TARGET_64BIT
7620 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7621 "@
7622 movhps\t{%1, %0|%0, %1}
7623 psrldq\t{$8, %0|%0, 8}
7624 movq\t{%H1, %0|%0, %H1}"
7625 [(set_attr "type" "ssemov,sseishft,ssemov")
7626 (set_attr "length_immediate" "*,1,*")
7627 (set_attr "atom_unit" "*,sishuf,*")
7628 (set_attr "memory" "*,none,*")
7629 (set_attr "mode" "V2SF,TI,TI")])
7630
7631 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7632 (define_insn "*vec_extractv2di_1_sse"
7633 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7634 (vec_select:DI
7635 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7636 (parallel [(const_int 1)])))]
7637 "!TARGET_SSE2 && TARGET_SSE
7638 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7639 "@
7640 movhps\t{%1, %0|%0, %1}
7641 movhlps\t{%1, %0|%0, %1}
7642 movlps\t{%H1, %0|%0, %H1}"
7643 [(set_attr "type" "ssemov")
7644 (set_attr "mode" "V2SF,V4SF,V2SF")])
7645
7646 (define_insn "*vec_dupv4si"
7647 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7648 (vec_duplicate:V4SI
7649 (match_operand:SI 1 "register_operand" " Y2,0")))]
7650 "TARGET_SSE"
7651 "@
7652 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7653 shufps\t{$0, %0, %0|%0, %0, 0}"
7654 [(set_attr "type" "sselog1")
7655 (set_attr "prefix" "maybe_vex,orig")
7656 (set_attr "length_immediate" "1")
7657 (set_attr "mode" "TI,V4SF")])
7658
7659 (define_insn "*vec_dupv2di_avx"
7660 [(set (match_operand:V2DI 0 "register_operand" "=x")
7661 (vec_duplicate:V2DI
7662 (match_operand:DI 1 "register_operand" "x")))]
7663 "TARGET_AVX"
7664 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
7665 [(set_attr "type" "sselog1")
7666 (set_attr "prefix" "vex")
7667 (set_attr "mode" "TI")])
7668
7669 (define_insn "*vec_dupv2di"
7670 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7671 (vec_duplicate:V2DI
7672 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7673 "TARGET_SSE"
7674 "@
7675 punpcklqdq\t%0, %0
7676 movlhps\t%0, %0"
7677 [(set_attr "type" "sselog1,ssemov")
7678 (set_attr "mode" "TI,V4SF")])
7679
7680 (define_insn "*vec_concatv2si_avx"
7681 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7682 (vec_concat:V2SI
7683 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7684 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7685 "TARGET_AVX"
7686 "@
7687 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7688 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7689 vmovd\t{%1, %0|%0, %1}
7690 punpckldq\t{%2, %0|%0, %2}
7691 movd\t{%1, %0|%0, %1}"
7692 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7693 (set_attr "prefix_extra" "1,*,*,*,*")
7694 (set_attr "length_immediate" "1,*,*,*,*")
7695 (set (attr "prefix")
7696 (if_then_else (eq_attr "alternative" "3,4")
7697 (const_string "orig")
7698 (const_string "vex")))
7699 (set_attr "mode" "TI,TI,TI,DI,DI")])
7700
7701 (define_insn "*vec_concatv2si_sse4_1"
7702 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7703 (vec_concat:V2SI
7704 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7705 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7706 "TARGET_SSE4_1"
7707 "@
7708 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7709 punpckldq\t{%2, %0|%0, %2}
7710 movd\t{%1, %0|%0, %1}
7711 punpckldq\t{%2, %0|%0, %2}
7712 movd\t{%1, %0|%0, %1}"
7713 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7714 (set_attr "prefix_extra" "1,*,*,*,*")
7715 (set_attr "length_immediate" "1,*,*,*,*")
7716 (set_attr "mode" "TI,TI,TI,DI,DI")])
7717
7718 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7719 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7720 ;; alternatives pretty much forces the MMX alternative to be chosen.
7721 (define_insn "*vec_concatv2si_sse2"
7722 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7723 (vec_concat:V2SI
7724 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7725 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7726 "TARGET_SSE2"
7727 "@
7728 punpckldq\t{%2, %0|%0, %2}
7729 movd\t{%1, %0|%0, %1}
7730 punpckldq\t{%2, %0|%0, %2}
7731 movd\t{%1, %0|%0, %1}"
7732 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7733 (set_attr "mode" "TI,TI,DI,DI")])
7734
7735 (define_insn "*vec_concatv2si_sse"
7736 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7737 (vec_concat:V2SI
7738 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7739 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7740 "TARGET_SSE"
7741 "@
7742 unpcklps\t{%2, %0|%0, %2}
7743 movss\t{%1, %0|%0, %1}
7744 punpckldq\t{%2, %0|%0, %2}
7745 movd\t{%1, %0|%0, %1}"
7746 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7747 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7748
7749 (define_insn "*vec_concatv4si_1_avx"
7750 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7751 (vec_concat:V4SI
7752 (match_operand:V2SI 1 "register_operand" " x,x")
7753 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7754 "TARGET_AVX"
7755 "@
7756 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7757 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7758 [(set_attr "type" "sselog,ssemov")
7759 (set_attr "prefix" "vex")
7760 (set_attr "mode" "TI,V2SF")])
7761
7762 (define_insn "*vec_concatv4si_1"
7763 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7764 (vec_concat:V4SI
7765 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7766 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7767 "TARGET_SSE"
7768 "@
7769 punpcklqdq\t{%2, %0|%0, %2}
7770 movlhps\t{%2, %0|%0, %2}
7771 movhps\t{%2, %0|%0, %2}"
7772 [(set_attr "type" "sselog,ssemov,ssemov")
7773 (set_attr "mode" "TI,V4SF,V2SF")])
7774
7775 (define_insn "*vec_concatv2di_avx"
7776 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7777 (vec_concat:V2DI
7778 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7779 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7780 "!TARGET_64BIT && TARGET_AVX"
7781 "@
7782 vmovq\t{%1, %0|%0, %1}
7783 movq2dq\t{%1, %0|%0, %1}
7784 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7785 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7786 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7787 (set (attr "prefix")
7788 (if_then_else (eq_attr "alternative" "1")
7789 (const_string "orig")
7790 (const_string "vex")))
7791 (set_attr "mode" "TI,TI,TI,V2SF")])
7792
7793 (define_insn "vec_concatv2di"
7794 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7795 (vec_concat:V2DI
7796 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7797 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7798 "!TARGET_64BIT && TARGET_SSE"
7799 "@
7800 movq\t{%1, %0|%0, %1}
7801 movq2dq\t{%1, %0|%0, %1}
7802 punpcklqdq\t{%2, %0|%0, %2}
7803 movlhps\t{%2, %0|%0, %2}
7804 movhps\t{%2, %0|%0, %2}"
7805 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7806 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7807
7808 (define_insn "*vec_concatv2di_rex64_avx"
7809 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7810 (vec_concat:V2DI
7811 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7812 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7813 "TARGET_64BIT && TARGET_AVX"
7814 "@
7815 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7816 vmovq\t{%1, %0|%0, %1}
7817 vmovq\t{%1, %0|%0, %1}
7818 movq2dq\t{%1, %0|%0, %1}
7819 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7820 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7821 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7822 (set_attr "prefix_extra" "1,*,*,*,*,*")
7823 (set_attr "length_immediate" "1,*,*,*,*,*")
7824 (set (attr "prefix")
7825 (if_then_else (eq_attr "alternative" "3")
7826 (const_string "orig")
7827 (const_string "vex")))
7828 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7829
7830 (define_insn "*vec_concatv2di_rex64_sse4_1"
7831 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7832 (vec_concat:V2DI
7833 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7834 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7835 "TARGET_64BIT && TARGET_SSE4_1"
7836 "@
7837 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7838 movq\t{%1, %0|%0, %1}
7839 movq\t{%1, %0|%0, %1}
7840 movq2dq\t{%1, %0|%0, %1}
7841 punpcklqdq\t{%2, %0|%0, %2}
7842 movlhps\t{%2, %0|%0, %2}
7843 movhps\t{%2, %0|%0, %2}"
7844 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7845 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7846 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7847 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7848 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7849
7850 (define_insn "*vec_concatv2di_rex64_sse"
7851 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7852 (vec_concat:V2DI
7853 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7854 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7855 "TARGET_64BIT && TARGET_SSE"
7856 "@
7857 movq\t{%1, %0|%0, %1}
7858 movq\t{%1, %0|%0, %1}
7859 movq2dq\t{%1, %0|%0, %1}
7860 punpcklqdq\t{%2, %0|%0, %2}
7861 movlhps\t{%2, %0|%0, %2}
7862 movhps\t{%2, %0|%0, %2}"
7863 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7864 (set_attr "prefix_rex" "*,1,*,*,*,*")
7865 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7866
7867 (define_expand "vec_unpacku_hi_v16qi"
7868 [(match_operand:V8HI 0 "register_operand" "")
7869 (match_operand:V16QI 1 "register_operand" "")]
7870 "TARGET_SSE2"
7871 {
7872 if (TARGET_SSE4_1)
7873 ix86_expand_sse4_unpack (operands, true, true);
7874 else
7875 ix86_expand_sse_unpack (operands, true, true);
7876 DONE;
7877 })
7878
7879 (define_expand "vec_unpacks_hi_v16qi"
7880 [(match_operand:V8HI 0 "register_operand" "")
7881 (match_operand:V16QI 1 "register_operand" "")]
7882 "TARGET_SSE2"
7883 {
7884 if (TARGET_SSE4_1)
7885 ix86_expand_sse4_unpack (operands, false, true);
7886 else
7887 ix86_expand_sse_unpack (operands, false, true);
7888 DONE;
7889 })
7890
7891 (define_expand "vec_unpacku_lo_v16qi"
7892 [(match_operand:V8HI 0 "register_operand" "")
7893 (match_operand:V16QI 1 "register_operand" "")]
7894 "TARGET_SSE2"
7895 {
7896 if (TARGET_SSE4_1)
7897 ix86_expand_sse4_unpack (operands, true, false);
7898 else
7899 ix86_expand_sse_unpack (operands, true, false);
7900 DONE;
7901 })
7902
7903 (define_expand "vec_unpacks_lo_v16qi"
7904 [(match_operand:V8HI 0 "register_operand" "")
7905 (match_operand:V16QI 1 "register_operand" "")]
7906 "TARGET_SSE2"
7907 {
7908 if (TARGET_SSE4_1)
7909 ix86_expand_sse4_unpack (operands, false, false);
7910 else
7911 ix86_expand_sse_unpack (operands, false, false);
7912 DONE;
7913 })
7914
7915 (define_expand "vec_unpacku_hi_v8hi"
7916 [(match_operand:V4SI 0 "register_operand" "")
7917 (match_operand:V8HI 1 "register_operand" "")]
7918 "TARGET_SSE2"
7919 {
7920 if (TARGET_SSE4_1)
7921 ix86_expand_sse4_unpack (operands, true, true);
7922 else
7923 ix86_expand_sse_unpack (operands, true, true);
7924 DONE;
7925 })
7926
7927 (define_expand "vec_unpacks_hi_v8hi"
7928 [(match_operand:V4SI 0 "register_operand" "")
7929 (match_operand:V8HI 1 "register_operand" "")]
7930 "TARGET_SSE2"
7931 {
7932 if (TARGET_SSE4_1)
7933 ix86_expand_sse4_unpack (operands, false, true);
7934 else
7935 ix86_expand_sse_unpack (operands, false, true);
7936 DONE;
7937 })
7938
7939 (define_expand "vec_unpacku_lo_v8hi"
7940 [(match_operand:V4SI 0 "register_operand" "")
7941 (match_operand:V8HI 1 "register_operand" "")]
7942 "TARGET_SSE2"
7943 {
7944 if (TARGET_SSE4_1)
7945 ix86_expand_sse4_unpack (operands, true, false);
7946 else
7947 ix86_expand_sse_unpack (operands, true, false);
7948 DONE;
7949 })
7950
7951 (define_expand "vec_unpacks_lo_v8hi"
7952 [(match_operand:V4SI 0 "register_operand" "")
7953 (match_operand:V8HI 1 "register_operand" "")]
7954 "TARGET_SSE2"
7955 {
7956 if (TARGET_SSE4_1)
7957 ix86_expand_sse4_unpack (operands, false, false);
7958 else
7959 ix86_expand_sse_unpack (operands, false, false);
7960 DONE;
7961 })
7962
7963 (define_expand "vec_unpacku_hi_v4si"
7964 [(match_operand:V2DI 0 "register_operand" "")
7965 (match_operand:V4SI 1 "register_operand" "")]
7966 "TARGET_SSE2"
7967 {
7968 if (TARGET_SSE4_1)
7969 ix86_expand_sse4_unpack (operands, true, true);
7970 else
7971 ix86_expand_sse_unpack (operands, true, true);
7972 DONE;
7973 })
7974
7975 (define_expand "vec_unpacks_hi_v4si"
7976 [(match_operand:V2DI 0 "register_operand" "")
7977 (match_operand:V4SI 1 "register_operand" "")]
7978 "TARGET_SSE2"
7979 {
7980 if (TARGET_SSE4_1)
7981 ix86_expand_sse4_unpack (operands, false, true);
7982 else
7983 ix86_expand_sse_unpack (operands, false, true);
7984 DONE;
7985 })
7986
7987 (define_expand "vec_unpacku_lo_v4si"
7988 [(match_operand:V2DI 0 "register_operand" "")
7989 (match_operand:V4SI 1 "register_operand" "")]
7990 "TARGET_SSE2"
7991 {
7992 if (TARGET_SSE4_1)
7993 ix86_expand_sse4_unpack (operands, true, false);
7994 else
7995 ix86_expand_sse_unpack (operands, true, false);
7996 DONE;
7997 })
7998
7999 (define_expand "vec_unpacks_lo_v4si"
8000 [(match_operand:V2DI 0 "register_operand" "")
8001 (match_operand:V4SI 1 "register_operand" "")]
8002 "TARGET_SSE2"
8003 {
8004 if (TARGET_SSE4_1)
8005 ix86_expand_sse4_unpack (operands, false, false);
8006 else
8007 ix86_expand_sse_unpack (operands, false, false);
8008 DONE;
8009 })
8010
8011 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8012 ;;
8013 ;; Miscellaneous
8014 ;;
8015 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8016
8017 (define_expand "sse2_uavgv16qi3"
8018 [(set (match_operand:V16QI 0 "register_operand" "")
8019 (truncate:V16QI
8020 (lshiftrt:V16HI
8021 (plus:V16HI
8022 (plus:V16HI
8023 (zero_extend:V16HI
8024 (match_operand:V16QI 1 "nonimmediate_operand" ""))
8025 (zero_extend:V16HI
8026 (match_operand:V16QI 2 "nonimmediate_operand" "")))
8027 (const_vector:V16QI [(const_int 1) (const_int 1)
8028 (const_int 1) (const_int 1)
8029 (const_int 1) (const_int 1)
8030 (const_int 1) (const_int 1)
8031 (const_int 1) (const_int 1)
8032 (const_int 1) (const_int 1)
8033 (const_int 1) (const_int 1)
8034 (const_int 1) (const_int 1)]))
8035 (const_int 1))))]
8036 "TARGET_SSE2"
8037 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
8038
8039 (define_insn "*avx_uavgv16qi3"
8040 [(set (match_operand:V16QI 0 "register_operand" "=x")
8041 (truncate:V16QI
8042 (lshiftrt:V16HI
8043 (plus:V16HI
8044 (plus:V16HI
8045 (zero_extend:V16HI
8046 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
8047 (zero_extend:V16HI
8048 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
8049 (const_vector:V16QI [(const_int 1) (const_int 1)
8050 (const_int 1) (const_int 1)
8051 (const_int 1) (const_int 1)
8052 (const_int 1) (const_int 1)
8053 (const_int 1) (const_int 1)
8054 (const_int 1) (const_int 1)
8055 (const_int 1) (const_int 1)
8056 (const_int 1) (const_int 1)]))
8057 (const_int 1))))]
8058 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
8059 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
8060 [(set_attr "type" "sseiadd")
8061 (set_attr "prefix" "vex")
8062 (set_attr "mode" "TI")])
8063
8064 (define_insn "*sse2_uavgv16qi3"
8065 [(set (match_operand:V16QI 0 "register_operand" "=x")
8066 (truncate:V16QI
8067 (lshiftrt:V16HI
8068 (plus:V16HI
8069 (plus:V16HI
8070 (zero_extend:V16HI
8071 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
8072 (zero_extend:V16HI
8073 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
8074 (const_vector:V16QI [(const_int 1) (const_int 1)
8075 (const_int 1) (const_int 1)
8076 (const_int 1) (const_int 1)
8077 (const_int 1) (const_int 1)
8078 (const_int 1) (const_int 1)
8079 (const_int 1) (const_int 1)
8080 (const_int 1) (const_int 1)
8081 (const_int 1) (const_int 1)]))
8082 (const_int 1))))]
8083 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
8084 "pavgb\t{%2, %0|%0, %2}"
8085 [(set_attr "type" "sseiadd")
8086 (set_attr "prefix_data16" "1")
8087 (set_attr "mode" "TI")])
8088
8089 (define_expand "sse2_uavgv8hi3"
8090 [(set (match_operand:V8HI 0 "register_operand" "")
8091 (truncate:V8HI
8092 (lshiftrt:V8SI
8093 (plus:V8SI
8094 (plus:V8SI
8095 (zero_extend:V8SI
8096 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8097 (zero_extend:V8SI
8098 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8099 (const_vector:V8HI [(const_int 1) (const_int 1)
8100 (const_int 1) (const_int 1)
8101 (const_int 1) (const_int 1)
8102 (const_int 1) (const_int 1)]))
8103 (const_int 1))))]
8104 "TARGET_SSE2"
8105 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
8106
8107 (define_insn "*avx_uavgv8hi3"
8108 [(set (match_operand:V8HI 0 "register_operand" "=x")
8109 (truncate:V8HI
8110 (lshiftrt:V8SI
8111 (plus:V8SI
8112 (plus:V8SI
8113 (zero_extend:V8SI
8114 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8115 (zero_extend:V8SI
8116 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8117 (const_vector:V8HI [(const_int 1) (const_int 1)
8118 (const_int 1) (const_int 1)
8119 (const_int 1) (const_int 1)
8120 (const_int 1) (const_int 1)]))
8121 (const_int 1))))]
8122 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8123 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
8124 [(set_attr "type" "sseiadd")
8125 (set_attr "prefix" "vex")
8126 (set_attr "mode" "TI")])
8127
8128 (define_insn "*sse2_uavgv8hi3"
8129 [(set (match_operand:V8HI 0 "register_operand" "=x")
8130 (truncate:V8HI
8131 (lshiftrt:V8SI
8132 (plus:V8SI
8133 (plus:V8SI
8134 (zero_extend:V8SI
8135 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8136 (zero_extend:V8SI
8137 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8138 (const_vector:V8HI [(const_int 1) (const_int 1)
8139 (const_int 1) (const_int 1)
8140 (const_int 1) (const_int 1)
8141 (const_int 1) (const_int 1)]))
8142 (const_int 1))))]
8143 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8144 "pavgw\t{%2, %0|%0, %2}"
8145 [(set_attr "type" "sseiadd")
8146 (set_attr "prefix_data16" "1")
8147 (set_attr "mode" "TI")])
8148
8149 ;; The correct representation for this is absolutely enormous, and
8150 ;; surely not generally useful.
8151 (define_insn "*avx_psadbw"
8152 [(set (match_operand:V2DI 0 "register_operand" "=x")
8153 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
8154 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8155 UNSPEC_PSADBW))]
8156 "TARGET_AVX"
8157 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8158 [(set_attr "type" "sseiadd")
8159 (set_attr "prefix" "vex")
8160 (set_attr "mode" "TI")])
8161
8162 (define_insn "sse2_psadbw"
8163 [(set (match_operand:V2DI 0 "register_operand" "=x")
8164 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
8165 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8166 UNSPEC_PSADBW))]
8167 "TARGET_SSE2"
8168 "psadbw\t{%2, %0|%0, %2}"
8169 [(set_attr "type" "sseiadd")
8170 (set_attr "atom_unit" "simul")
8171 (set_attr "prefix_data16" "1")
8172 (set_attr "mode" "TI")])
8173
8174 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
8175 [(set (match_operand:SI 0 "register_operand" "=r")
8176 (unspec:SI
8177 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8178 UNSPEC_MOVMSK))]
8179 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8180 "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
8181 [(set_attr "type" "ssecvt")
8182 (set_attr "prefix" "vex")
8183 (set_attr "mode" "<MODE>")])
8184
8185 (define_insn "<sse>_movmskp<ssemodesuffixf2c>"
8186 [(set (match_operand:SI 0 "register_operand" "=r")
8187 (unspec:SI
8188 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8189 UNSPEC_MOVMSK))]
8190 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8191 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
8192 [(set_attr "type" "ssemov")
8193 (set_attr "prefix" "maybe_vex")
8194 (set_attr "mode" "<MODE>")])
8195
8196 (define_insn "sse2_pmovmskb"
8197 [(set (match_operand:SI 0 "register_operand" "=r")
8198 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8199 UNSPEC_MOVMSK))]
8200 "TARGET_SSE2"
8201 "%vpmovmskb\t{%1, %0|%0, %1}"
8202 [(set_attr "type" "ssemov")
8203 (set_attr "prefix_data16" "1")
8204 (set_attr "prefix" "maybe_vex")
8205 (set_attr "mode" "SI")])
8206
8207 (define_expand "sse2_maskmovdqu"
8208 [(set (match_operand:V16QI 0 "memory_operand" "")
8209 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8210 (match_operand:V16QI 2 "register_operand" "")
8211 (match_dup 0)]
8212 UNSPEC_MASKMOV))]
8213 "TARGET_SSE2"
8214 "")
8215
8216 (define_insn "*sse2_maskmovdqu"
8217 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8218 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8219 (match_operand:V16QI 2 "register_operand" "x")
8220 (mem:V16QI (match_dup 0))]
8221 UNSPEC_MASKMOV))]
8222 "TARGET_SSE2 && !TARGET_64BIT"
8223 ;; @@@ check ordering of operands in intel/nonintel syntax
8224 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8225 [(set_attr "type" "ssemov")
8226 (set_attr "prefix_data16" "1")
8227 ;; The implicit %rdi operand confuses default length_vex computation.
8228 (set_attr "length_vex" "3")
8229 (set_attr "prefix" "maybe_vex")
8230 (set_attr "mode" "TI")])
8231
8232 (define_insn "*sse2_maskmovdqu_rex64"
8233 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8234 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8235 (match_operand:V16QI 2 "register_operand" "x")
8236 (mem:V16QI (match_dup 0))]
8237 UNSPEC_MASKMOV))]
8238 "TARGET_SSE2 && TARGET_64BIT"
8239 ;; @@@ check ordering of operands in intel/nonintel syntax
8240 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8241 [(set_attr "type" "ssemov")
8242 (set_attr "prefix_data16" "1")
8243 ;; The implicit %rdi operand confuses default length_vex computation.
8244 (set (attr "length_vex")
8245 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8246 (set_attr "prefix" "maybe_vex")
8247 (set_attr "mode" "TI")])
8248
8249 (define_insn "sse_ldmxcsr"
8250 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8251 UNSPECV_LDMXCSR)]
8252 "TARGET_SSE"
8253 "%vldmxcsr\t%0"
8254 [(set_attr "type" "sse")
8255 (set_attr "atom_sse_attr" "mxcsr")
8256 (set_attr "prefix" "maybe_vex")
8257 (set_attr "memory" "load")])
8258
8259 (define_insn "sse_stmxcsr"
8260 [(set (match_operand:SI 0 "memory_operand" "=m")
8261 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8262 "TARGET_SSE"
8263 "%vstmxcsr\t%0"
8264 [(set_attr "type" "sse")
8265 (set_attr "atom_sse_attr" "mxcsr")
8266 (set_attr "prefix" "maybe_vex")
8267 (set_attr "memory" "store")])
8268
8269 (define_expand "sse_sfence"
8270 [(set (match_dup 0)
8271 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8272 "TARGET_SSE || TARGET_3DNOW_A"
8273 {
8274 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8275 MEM_VOLATILE_P (operands[0]) = 1;
8276 })
8277
8278 (define_insn "*sse_sfence"
8279 [(set (match_operand:BLK 0 "" "")
8280 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8281 "TARGET_SSE || TARGET_3DNOW_A"
8282 "sfence"
8283 [(set_attr "type" "sse")
8284 (set_attr "length_address" "0")
8285 (set_attr "atom_sse_attr" "fence")
8286 (set_attr "memory" "unknown")])
8287
8288 (define_insn "sse2_clflush"
8289 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8290 UNSPECV_CLFLUSH)]
8291 "TARGET_SSE2"
8292 "clflush\t%a0"
8293 [(set_attr "type" "sse")
8294 (set_attr "atom_sse_attr" "fence")
8295 (set_attr "memory" "unknown")])
8296
8297 (define_expand "sse2_mfence"
8298 [(set (match_dup 0)
8299 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8300 "TARGET_SSE2"
8301 {
8302 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8303 MEM_VOLATILE_P (operands[0]) = 1;
8304 })
8305
8306 (define_insn "*sse2_mfence"
8307 [(set (match_operand:BLK 0 "" "")
8308 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8309 "TARGET_64BIT || TARGET_SSE2"
8310 "mfence"
8311 [(set_attr "type" "sse")
8312 (set_attr "length_address" "0")
8313 (set_attr "atom_sse_attr" "fence")
8314 (set_attr "memory" "unknown")])
8315
8316 (define_expand "sse2_lfence"
8317 [(set (match_dup 0)
8318 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8319 "TARGET_SSE2"
8320 {
8321 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8322 MEM_VOLATILE_P (operands[0]) = 1;
8323 })
8324
8325 (define_insn "*sse2_lfence"
8326 [(set (match_operand:BLK 0 "" "")
8327 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8328 "TARGET_SSE2"
8329 "lfence"
8330 [(set_attr "type" "sse")
8331 (set_attr "length_address" "0")
8332 (set_attr "atom_sse_attr" "lfence")
8333 (set_attr "memory" "unknown")])
8334
8335 (define_insn "sse3_mwait"
8336 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8337 (match_operand:SI 1 "register_operand" "c")]
8338 UNSPECV_MWAIT)]
8339 "TARGET_SSE3"
8340 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8341 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8342 ;; we only need to set up 32bit registers.
8343 "mwait"
8344 [(set_attr "length" "3")])
8345
8346 (define_insn "sse3_monitor"
8347 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8348 (match_operand:SI 1 "register_operand" "c")
8349 (match_operand:SI 2 "register_operand" "d")]
8350 UNSPECV_MONITOR)]
8351 "TARGET_SSE3 && !TARGET_64BIT"
8352 "monitor\t%0, %1, %2"
8353 [(set_attr "length" "3")])
8354
8355 (define_insn "sse3_monitor64"
8356 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8357 (match_operand:SI 1 "register_operand" "c")
8358 (match_operand:SI 2 "register_operand" "d")]
8359 UNSPECV_MONITOR)]
8360 "TARGET_SSE3 && TARGET_64BIT"
8361 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8362 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8363 ;; zero extended to 64bit, we only need to set up 32bit registers.
8364 "monitor"
8365 [(set_attr "length" "3")])
8366
8367 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8368 ;;
8369 ;; SSSE3 instructions
8370 ;;
8371 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8372
8373 (define_insn "*avx_phaddwv8hi3"
8374 [(set (match_operand:V8HI 0 "register_operand" "=x")
8375 (vec_concat:V8HI
8376 (vec_concat:V4HI
8377 (vec_concat:V2HI
8378 (plus:HI
8379 (vec_select:HI
8380 (match_operand:V8HI 1 "register_operand" "x")
8381 (parallel [(const_int 0)]))
8382 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8383 (plus:HI
8384 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8385 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8386 (vec_concat:V2HI
8387 (plus:HI
8388 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8389 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8390 (plus:HI
8391 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8392 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8393 (vec_concat:V4HI
8394 (vec_concat:V2HI
8395 (plus:HI
8396 (vec_select:HI
8397 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8398 (parallel [(const_int 0)]))
8399 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8400 (plus:HI
8401 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8402 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8403 (vec_concat:V2HI
8404 (plus:HI
8405 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8406 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8407 (plus:HI
8408 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8409 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8410 "TARGET_AVX"
8411 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8412 [(set_attr "type" "sseiadd")
8413 (set_attr "prefix_extra" "1")
8414 (set_attr "prefix" "vex")
8415 (set_attr "mode" "TI")])
8416
8417 (define_insn "ssse3_phaddwv8hi3"
8418 [(set (match_operand:V8HI 0 "register_operand" "=x")
8419 (vec_concat:V8HI
8420 (vec_concat:V4HI
8421 (vec_concat:V2HI
8422 (plus:HI
8423 (vec_select:HI
8424 (match_operand:V8HI 1 "register_operand" "0")
8425 (parallel [(const_int 0)]))
8426 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8427 (plus:HI
8428 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8429 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8430 (vec_concat:V2HI
8431 (plus:HI
8432 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8433 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8434 (plus:HI
8435 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8436 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8437 (vec_concat:V4HI
8438 (vec_concat:V2HI
8439 (plus:HI
8440 (vec_select:HI
8441 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8442 (parallel [(const_int 0)]))
8443 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8444 (plus:HI
8445 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8446 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8447 (vec_concat:V2HI
8448 (plus:HI
8449 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8450 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8451 (plus:HI
8452 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8453 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8454 "TARGET_SSSE3"
8455 "phaddw\t{%2, %0|%0, %2}"
8456 [(set_attr "type" "sseiadd")
8457 (set_attr "atom_unit" "complex")
8458 (set_attr "prefix_data16" "1")
8459 (set_attr "prefix_extra" "1")
8460 (set_attr "mode" "TI")])
8461
8462 (define_insn "ssse3_phaddwv4hi3"
8463 [(set (match_operand:V4HI 0 "register_operand" "=y")
8464 (vec_concat:V4HI
8465 (vec_concat:V2HI
8466 (plus:HI
8467 (vec_select:HI
8468 (match_operand:V4HI 1 "register_operand" "0")
8469 (parallel [(const_int 0)]))
8470 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8471 (plus:HI
8472 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8473 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8474 (vec_concat:V2HI
8475 (plus:HI
8476 (vec_select:HI
8477 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8478 (parallel [(const_int 0)]))
8479 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8480 (plus:HI
8481 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8482 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8483 "TARGET_SSSE3"
8484 "phaddw\t{%2, %0|%0, %2}"
8485 [(set_attr "type" "sseiadd")
8486 (set_attr "atom_unit" "complex")
8487 (set_attr "prefix_extra" "1")
8488 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8489 (set_attr "mode" "DI")])
8490
8491 (define_insn "*avx_phadddv4si3"
8492 [(set (match_operand:V4SI 0 "register_operand" "=x")
8493 (vec_concat:V4SI
8494 (vec_concat:V2SI
8495 (plus:SI
8496 (vec_select:SI
8497 (match_operand:V4SI 1 "register_operand" "x")
8498 (parallel [(const_int 0)]))
8499 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8500 (plus:SI
8501 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8502 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8503 (vec_concat:V2SI
8504 (plus:SI
8505 (vec_select:SI
8506 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8507 (parallel [(const_int 0)]))
8508 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8509 (plus:SI
8510 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8511 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8512 "TARGET_AVX"
8513 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8514 [(set_attr "type" "sseiadd")
8515 (set_attr "prefix_extra" "1")
8516 (set_attr "prefix" "vex")
8517 (set_attr "mode" "TI")])
8518
8519 (define_insn "ssse3_phadddv4si3"
8520 [(set (match_operand:V4SI 0 "register_operand" "=x")
8521 (vec_concat:V4SI
8522 (vec_concat:V2SI
8523 (plus:SI
8524 (vec_select:SI
8525 (match_operand:V4SI 1 "register_operand" "0")
8526 (parallel [(const_int 0)]))
8527 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8528 (plus:SI
8529 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8530 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8531 (vec_concat:V2SI
8532 (plus:SI
8533 (vec_select:SI
8534 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8535 (parallel [(const_int 0)]))
8536 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8537 (plus:SI
8538 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8539 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8540 "TARGET_SSSE3"
8541 "phaddd\t{%2, %0|%0, %2}"
8542 [(set_attr "type" "sseiadd")
8543 (set_attr "atom_unit" "complex")
8544 (set_attr "prefix_data16" "1")
8545 (set_attr "prefix_extra" "1")
8546 (set_attr "mode" "TI")])
8547
8548 (define_insn "ssse3_phadddv2si3"
8549 [(set (match_operand:V2SI 0 "register_operand" "=y")
8550 (vec_concat:V2SI
8551 (plus:SI
8552 (vec_select:SI
8553 (match_operand:V2SI 1 "register_operand" "0")
8554 (parallel [(const_int 0)]))
8555 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8556 (plus:SI
8557 (vec_select:SI
8558 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8559 (parallel [(const_int 0)]))
8560 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8561 "TARGET_SSSE3"
8562 "phaddd\t{%2, %0|%0, %2}"
8563 [(set_attr "type" "sseiadd")
8564 (set_attr "atom_unit" "complex")
8565 (set_attr "prefix_extra" "1")
8566 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8567 (set_attr "mode" "DI")])
8568
8569 (define_insn "*avx_phaddswv8hi3"
8570 [(set (match_operand:V8HI 0 "register_operand" "=x")
8571 (vec_concat:V8HI
8572 (vec_concat:V4HI
8573 (vec_concat:V2HI
8574 (ss_plus:HI
8575 (vec_select:HI
8576 (match_operand:V8HI 1 "register_operand" "x")
8577 (parallel [(const_int 0)]))
8578 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8579 (ss_plus:HI
8580 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8581 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8582 (vec_concat:V2HI
8583 (ss_plus:HI
8584 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8585 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8586 (ss_plus:HI
8587 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8588 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8589 (vec_concat:V4HI
8590 (vec_concat:V2HI
8591 (ss_plus:HI
8592 (vec_select:HI
8593 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8594 (parallel [(const_int 0)]))
8595 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8596 (ss_plus:HI
8597 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8598 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8599 (vec_concat:V2HI
8600 (ss_plus:HI
8601 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8602 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8603 (ss_plus:HI
8604 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8605 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8606 "TARGET_AVX"
8607 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8608 [(set_attr "type" "sseiadd")
8609 (set_attr "prefix_extra" "1")
8610 (set_attr "prefix" "vex")
8611 (set_attr "mode" "TI")])
8612
8613 (define_insn "ssse3_phaddswv8hi3"
8614 [(set (match_operand:V8HI 0 "register_operand" "=x")
8615 (vec_concat:V8HI
8616 (vec_concat:V4HI
8617 (vec_concat:V2HI
8618 (ss_plus:HI
8619 (vec_select:HI
8620 (match_operand:V8HI 1 "register_operand" "0")
8621 (parallel [(const_int 0)]))
8622 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8623 (ss_plus:HI
8624 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8625 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8626 (vec_concat:V2HI
8627 (ss_plus:HI
8628 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8629 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8630 (ss_plus:HI
8631 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8632 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8633 (vec_concat:V4HI
8634 (vec_concat:V2HI
8635 (ss_plus:HI
8636 (vec_select:HI
8637 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8638 (parallel [(const_int 0)]))
8639 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8640 (ss_plus:HI
8641 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8642 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8643 (vec_concat:V2HI
8644 (ss_plus:HI
8645 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8646 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8647 (ss_plus:HI
8648 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8649 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8650 "TARGET_SSSE3"
8651 "phaddsw\t{%2, %0|%0, %2}"
8652 [(set_attr "type" "sseiadd")
8653 (set_attr "atom_unit" "complex")
8654 (set_attr "prefix_data16" "1")
8655 (set_attr "prefix_extra" "1")
8656 (set_attr "mode" "TI")])
8657
8658 (define_insn "ssse3_phaddswv4hi3"
8659 [(set (match_operand:V4HI 0 "register_operand" "=y")
8660 (vec_concat:V4HI
8661 (vec_concat:V2HI
8662 (ss_plus:HI
8663 (vec_select:HI
8664 (match_operand:V4HI 1 "register_operand" "0")
8665 (parallel [(const_int 0)]))
8666 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8667 (ss_plus:HI
8668 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8669 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8670 (vec_concat:V2HI
8671 (ss_plus:HI
8672 (vec_select:HI
8673 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8674 (parallel [(const_int 0)]))
8675 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8676 (ss_plus:HI
8677 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8678 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8679 "TARGET_SSSE3"
8680 "phaddsw\t{%2, %0|%0, %2}"
8681 [(set_attr "type" "sseiadd")
8682 (set_attr "atom_unit" "complex")
8683 (set_attr "prefix_extra" "1")
8684 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8685 (set_attr "mode" "DI")])
8686
8687 (define_insn "*avx_phsubwv8hi3"
8688 [(set (match_operand:V8HI 0 "register_operand" "=x")
8689 (vec_concat:V8HI
8690 (vec_concat:V4HI
8691 (vec_concat:V2HI
8692 (minus:HI
8693 (vec_select:HI
8694 (match_operand:V8HI 1 "register_operand" "x")
8695 (parallel [(const_int 0)]))
8696 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8697 (minus:HI
8698 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8699 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8700 (vec_concat:V2HI
8701 (minus:HI
8702 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8703 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8704 (minus:HI
8705 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8706 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8707 (vec_concat:V4HI
8708 (vec_concat:V2HI
8709 (minus:HI
8710 (vec_select:HI
8711 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8712 (parallel [(const_int 0)]))
8713 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8714 (minus:HI
8715 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8716 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8717 (vec_concat:V2HI
8718 (minus:HI
8719 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8720 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8721 (minus:HI
8722 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8723 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8724 "TARGET_AVX"
8725 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8726 [(set_attr "type" "sseiadd")
8727 (set_attr "prefix_extra" "1")
8728 (set_attr "prefix" "vex")
8729 (set_attr "mode" "TI")])
8730
8731 (define_insn "ssse3_phsubwv8hi3"
8732 [(set (match_operand:V8HI 0 "register_operand" "=x")
8733 (vec_concat:V8HI
8734 (vec_concat:V4HI
8735 (vec_concat:V2HI
8736 (minus:HI
8737 (vec_select:HI
8738 (match_operand:V8HI 1 "register_operand" "0")
8739 (parallel [(const_int 0)]))
8740 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8741 (minus:HI
8742 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8743 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8744 (vec_concat:V2HI
8745 (minus:HI
8746 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8747 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8748 (minus:HI
8749 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8750 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8751 (vec_concat:V4HI
8752 (vec_concat:V2HI
8753 (minus:HI
8754 (vec_select:HI
8755 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8756 (parallel [(const_int 0)]))
8757 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8758 (minus:HI
8759 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8760 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8761 (vec_concat:V2HI
8762 (minus:HI
8763 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8764 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8765 (minus:HI
8766 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8767 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8768 "TARGET_SSSE3"
8769 "phsubw\t{%2, %0|%0, %2}"
8770 [(set_attr "type" "sseiadd")
8771 (set_attr "atom_unit" "complex")
8772 (set_attr "prefix_data16" "1")
8773 (set_attr "prefix_extra" "1")
8774 (set_attr "mode" "TI")])
8775
8776 (define_insn "ssse3_phsubwv4hi3"
8777 [(set (match_operand:V4HI 0 "register_operand" "=y")
8778 (vec_concat:V4HI
8779 (vec_concat:V2HI
8780 (minus:HI
8781 (vec_select:HI
8782 (match_operand:V4HI 1 "register_operand" "0")
8783 (parallel [(const_int 0)]))
8784 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8785 (minus:HI
8786 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8787 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8788 (vec_concat:V2HI
8789 (minus:HI
8790 (vec_select:HI
8791 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8792 (parallel [(const_int 0)]))
8793 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8794 (minus:HI
8795 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8796 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8797 "TARGET_SSSE3"
8798 "phsubw\t{%2, %0|%0, %2}"
8799 [(set_attr "type" "sseiadd")
8800 (set_attr "atom_unit" "complex")
8801 (set_attr "prefix_extra" "1")
8802 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8803 (set_attr "mode" "DI")])
8804
8805 (define_insn "*avx_phsubdv4si3"
8806 [(set (match_operand:V4SI 0 "register_operand" "=x")
8807 (vec_concat:V4SI
8808 (vec_concat:V2SI
8809 (minus:SI
8810 (vec_select:SI
8811 (match_operand:V4SI 1 "register_operand" "x")
8812 (parallel [(const_int 0)]))
8813 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8814 (minus:SI
8815 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8816 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8817 (vec_concat:V2SI
8818 (minus:SI
8819 (vec_select:SI
8820 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8821 (parallel [(const_int 0)]))
8822 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8823 (minus:SI
8824 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8825 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8826 "TARGET_AVX"
8827 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8828 [(set_attr "type" "sseiadd")
8829 (set_attr "prefix_extra" "1")
8830 (set_attr "prefix" "vex")
8831 (set_attr "mode" "TI")])
8832
8833 (define_insn "ssse3_phsubdv4si3"
8834 [(set (match_operand:V4SI 0 "register_operand" "=x")
8835 (vec_concat:V4SI
8836 (vec_concat:V2SI
8837 (minus:SI
8838 (vec_select:SI
8839 (match_operand:V4SI 1 "register_operand" "0")
8840 (parallel [(const_int 0)]))
8841 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8842 (minus:SI
8843 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8844 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8845 (vec_concat:V2SI
8846 (minus:SI
8847 (vec_select:SI
8848 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8849 (parallel [(const_int 0)]))
8850 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8851 (minus:SI
8852 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8853 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8854 "TARGET_SSSE3"
8855 "phsubd\t{%2, %0|%0, %2}"
8856 [(set_attr "type" "sseiadd")
8857 (set_attr "atom_unit" "complex")
8858 (set_attr "prefix_data16" "1")
8859 (set_attr "prefix_extra" "1")
8860 (set_attr "mode" "TI")])
8861
8862 (define_insn "ssse3_phsubdv2si3"
8863 [(set (match_operand:V2SI 0 "register_operand" "=y")
8864 (vec_concat:V2SI
8865 (minus:SI
8866 (vec_select:SI
8867 (match_operand:V2SI 1 "register_operand" "0")
8868 (parallel [(const_int 0)]))
8869 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8870 (minus:SI
8871 (vec_select:SI
8872 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8873 (parallel [(const_int 0)]))
8874 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8875 "TARGET_SSSE3"
8876 "phsubd\t{%2, %0|%0, %2}"
8877 [(set_attr "type" "sseiadd")
8878 (set_attr "atom_unit" "complex")
8879 (set_attr "prefix_extra" "1")
8880 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8881 (set_attr "mode" "DI")])
8882
8883 (define_insn "*avx_phsubswv8hi3"
8884 [(set (match_operand:V8HI 0 "register_operand" "=x")
8885 (vec_concat:V8HI
8886 (vec_concat:V4HI
8887 (vec_concat:V2HI
8888 (ss_minus:HI
8889 (vec_select:HI
8890 (match_operand:V8HI 1 "register_operand" "x")
8891 (parallel [(const_int 0)]))
8892 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8893 (ss_minus:HI
8894 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8895 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8896 (vec_concat:V2HI
8897 (ss_minus:HI
8898 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8899 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8900 (ss_minus:HI
8901 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8902 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8903 (vec_concat:V4HI
8904 (vec_concat:V2HI
8905 (ss_minus:HI
8906 (vec_select:HI
8907 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8908 (parallel [(const_int 0)]))
8909 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8910 (ss_minus:HI
8911 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8912 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8913 (vec_concat:V2HI
8914 (ss_minus:HI
8915 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8916 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8917 (ss_minus:HI
8918 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8919 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8920 "TARGET_AVX"
8921 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8922 [(set_attr "type" "sseiadd")
8923 (set_attr "prefix_extra" "1")
8924 (set_attr "prefix" "vex")
8925 (set_attr "mode" "TI")])
8926
8927 (define_insn "ssse3_phsubswv8hi3"
8928 [(set (match_operand:V8HI 0 "register_operand" "=x")
8929 (vec_concat:V8HI
8930 (vec_concat:V4HI
8931 (vec_concat:V2HI
8932 (ss_minus:HI
8933 (vec_select:HI
8934 (match_operand:V8HI 1 "register_operand" "0")
8935 (parallel [(const_int 0)]))
8936 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8937 (ss_minus:HI
8938 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8939 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8940 (vec_concat:V2HI
8941 (ss_minus:HI
8942 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8943 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8944 (ss_minus:HI
8945 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8946 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8947 (vec_concat:V4HI
8948 (vec_concat:V2HI
8949 (ss_minus:HI
8950 (vec_select:HI
8951 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8952 (parallel [(const_int 0)]))
8953 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8954 (ss_minus:HI
8955 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8956 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8957 (vec_concat:V2HI
8958 (ss_minus:HI
8959 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8960 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8961 (ss_minus:HI
8962 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8963 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8964 "TARGET_SSSE3"
8965 "phsubsw\t{%2, %0|%0, %2}"
8966 [(set_attr "type" "sseiadd")
8967 (set_attr "atom_unit" "complex")
8968 (set_attr "prefix_data16" "1")
8969 (set_attr "prefix_extra" "1")
8970 (set_attr "mode" "TI")])
8971
8972 (define_insn "ssse3_phsubswv4hi3"
8973 [(set (match_operand:V4HI 0 "register_operand" "=y")
8974 (vec_concat:V4HI
8975 (vec_concat:V2HI
8976 (ss_minus:HI
8977 (vec_select:HI
8978 (match_operand:V4HI 1 "register_operand" "0")
8979 (parallel [(const_int 0)]))
8980 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8981 (ss_minus:HI
8982 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8983 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8984 (vec_concat:V2HI
8985 (ss_minus:HI
8986 (vec_select:HI
8987 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8988 (parallel [(const_int 0)]))
8989 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8990 (ss_minus:HI
8991 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8992 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8993 "TARGET_SSSE3"
8994 "phsubsw\t{%2, %0|%0, %2}"
8995 [(set_attr "type" "sseiadd")
8996 (set_attr "atom_unit" "complex")
8997 (set_attr "prefix_extra" "1")
8998 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8999 (set_attr "mode" "DI")])
9000
9001 (define_insn "*avx_pmaddubsw128"
9002 [(set (match_operand:V8HI 0 "register_operand" "=x")
9003 (ss_plus:V8HI
9004 (mult:V8HI
9005 (zero_extend:V8HI
9006 (vec_select:V4QI
9007 (match_operand:V16QI 1 "register_operand" "x")
9008 (parallel [(const_int 0)
9009 (const_int 2)
9010 (const_int 4)
9011 (const_int 6)
9012 (const_int 8)
9013 (const_int 10)
9014 (const_int 12)
9015 (const_int 14)])))
9016 (sign_extend:V8HI
9017 (vec_select:V8QI
9018 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9019 (parallel [(const_int 0)
9020 (const_int 2)
9021 (const_int 4)
9022 (const_int 6)
9023 (const_int 8)
9024 (const_int 10)
9025 (const_int 12)
9026 (const_int 14)]))))
9027 (mult:V8HI
9028 (zero_extend:V8HI
9029 (vec_select:V16QI (match_dup 1)
9030 (parallel [(const_int 1)
9031 (const_int 3)
9032 (const_int 5)
9033 (const_int 7)
9034 (const_int 9)
9035 (const_int 11)
9036 (const_int 13)
9037 (const_int 15)])))
9038 (sign_extend:V8HI
9039 (vec_select:V16QI (match_dup 2)
9040 (parallel [(const_int 1)
9041 (const_int 3)
9042 (const_int 5)
9043 (const_int 7)
9044 (const_int 9)
9045 (const_int 11)
9046 (const_int 13)
9047 (const_int 15)]))))))]
9048 "TARGET_AVX"
9049 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9050 [(set_attr "type" "sseiadd")
9051 (set_attr "prefix_extra" "1")
9052 (set_attr "prefix" "vex")
9053 (set_attr "mode" "TI")])
9054
9055 (define_insn "ssse3_pmaddubsw128"
9056 [(set (match_operand:V8HI 0 "register_operand" "=x")
9057 (ss_plus:V8HI
9058 (mult:V8HI
9059 (zero_extend:V8HI
9060 (vec_select:V4QI
9061 (match_operand:V16QI 1 "register_operand" "0")
9062 (parallel [(const_int 0)
9063 (const_int 2)
9064 (const_int 4)
9065 (const_int 6)
9066 (const_int 8)
9067 (const_int 10)
9068 (const_int 12)
9069 (const_int 14)])))
9070 (sign_extend:V8HI
9071 (vec_select:V8QI
9072 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9073 (parallel [(const_int 0)
9074 (const_int 2)
9075 (const_int 4)
9076 (const_int 6)
9077 (const_int 8)
9078 (const_int 10)
9079 (const_int 12)
9080 (const_int 14)]))))
9081 (mult:V8HI
9082 (zero_extend:V8HI
9083 (vec_select:V16QI (match_dup 1)
9084 (parallel [(const_int 1)
9085 (const_int 3)
9086 (const_int 5)
9087 (const_int 7)
9088 (const_int 9)
9089 (const_int 11)
9090 (const_int 13)
9091 (const_int 15)])))
9092 (sign_extend:V8HI
9093 (vec_select:V16QI (match_dup 2)
9094 (parallel [(const_int 1)
9095 (const_int 3)
9096 (const_int 5)
9097 (const_int 7)
9098 (const_int 9)
9099 (const_int 11)
9100 (const_int 13)
9101 (const_int 15)]))))))]
9102 "TARGET_SSSE3"
9103 "pmaddubsw\t{%2, %0|%0, %2}"
9104 [(set_attr "type" "sseiadd")
9105 (set_attr "atom_unit" "simul")
9106 (set_attr "prefix_data16" "1")
9107 (set_attr "prefix_extra" "1")
9108 (set_attr "mode" "TI")])
9109
9110 (define_insn "ssse3_pmaddubsw"
9111 [(set (match_operand:V4HI 0 "register_operand" "=y")
9112 (ss_plus:V4HI
9113 (mult:V4HI
9114 (zero_extend:V4HI
9115 (vec_select:V4QI
9116 (match_operand:V8QI 1 "register_operand" "0")
9117 (parallel [(const_int 0)
9118 (const_int 2)
9119 (const_int 4)
9120 (const_int 6)])))
9121 (sign_extend:V4HI
9122 (vec_select:V4QI
9123 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9124 (parallel [(const_int 0)
9125 (const_int 2)
9126 (const_int 4)
9127 (const_int 6)]))))
9128 (mult:V4HI
9129 (zero_extend:V4HI
9130 (vec_select:V8QI (match_dup 1)
9131 (parallel [(const_int 1)
9132 (const_int 3)
9133 (const_int 5)
9134 (const_int 7)])))
9135 (sign_extend:V4HI
9136 (vec_select:V8QI (match_dup 2)
9137 (parallel [(const_int 1)
9138 (const_int 3)
9139 (const_int 5)
9140 (const_int 7)]))))))]
9141 "TARGET_SSSE3"
9142 "pmaddubsw\t{%2, %0|%0, %2}"
9143 [(set_attr "type" "sseiadd")
9144 (set_attr "atom_unit" "simul")
9145 (set_attr "prefix_extra" "1")
9146 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9147 (set_attr "mode" "DI")])
9148
9149 (define_expand "ssse3_pmulhrswv8hi3"
9150 [(set (match_operand:V8HI 0 "register_operand" "")
9151 (truncate:V8HI
9152 (lshiftrt:V8SI
9153 (plus:V8SI
9154 (lshiftrt:V8SI
9155 (mult:V8SI
9156 (sign_extend:V8SI
9157 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9158 (sign_extend:V8SI
9159 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9160 (const_int 14))
9161 (const_vector:V8HI [(const_int 1) (const_int 1)
9162 (const_int 1) (const_int 1)
9163 (const_int 1) (const_int 1)
9164 (const_int 1) (const_int 1)]))
9165 (const_int 1))))]
9166 "TARGET_SSSE3"
9167 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9168
9169 (define_insn "*avx_pmulhrswv8hi3"
9170 [(set (match_operand:V8HI 0 "register_operand" "=x")
9171 (truncate:V8HI
9172 (lshiftrt:V8SI
9173 (plus:V8SI
9174 (lshiftrt:V8SI
9175 (mult:V8SI
9176 (sign_extend:V8SI
9177 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9178 (sign_extend:V8SI
9179 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9180 (const_int 14))
9181 (const_vector:V8HI [(const_int 1) (const_int 1)
9182 (const_int 1) (const_int 1)
9183 (const_int 1) (const_int 1)
9184 (const_int 1) (const_int 1)]))
9185 (const_int 1))))]
9186 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9187 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9188 [(set_attr "type" "sseimul")
9189 (set_attr "prefix_extra" "1")
9190 (set_attr "prefix" "vex")
9191 (set_attr "mode" "TI")])
9192
9193 (define_insn "*ssse3_pmulhrswv8hi3"
9194 [(set (match_operand:V8HI 0 "register_operand" "=x")
9195 (truncate:V8HI
9196 (lshiftrt:V8SI
9197 (plus:V8SI
9198 (lshiftrt:V8SI
9199 (mult:V8SI
9200 (sign_extend:V8SI
9201 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9202 (sign_extend:V8SI
9203 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9204 (const_int 14))
9205 (const_vector:V8HI [(const_int 1) (const_int 1)
9206 (const_int 1) (const_int 1)
9207 (const_int 1) (const_int 1)
9208 (const_int 1) (const_int 1)]))
9209 (const_int 1))))]
9210 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9211 "pmulhrsw\t{%2, %0|%0, %2}"
9212 [(set_attr "type" "sseimul")
9213 (set_attr "prefix_data16" "1")
9214 (set_attr "prefix_extra" "1")
9215 (set_attr "mode" "TI")])
9216
9217 (define_expand "ssse3_pmulhrswv4hi3"
9218 [(set (match_operand:V4HI 0 "register_operand" "")
9219 (truncate:V4HI
9220 (lshiftrt:V4SI
9221 (plus:V4SI
9222 (lshiftrt:V4SI
9223 (mult:V4SI
9224 (sign_extend:V4SI
9225 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9226 (sign_extend:V4SI
9227 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9228 (const_int 14))
9229 (const_vector:V4HI [(const_int 1) (const_int 1)
9230 (const_int 1) (const_int 1)]))
9231 (const_int 1))))]
9232 "TARGET_SSSE3"
9233 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9234
9235 (define_insn "*ssse3_pmulhrswv4hi3"
9236 [(set (match_operand:V4HI 0 "register_operand" "=y")
9237 (truncate:V4HI
9238 (lshiftrt:V4SI
9239 (plus:V4SI
9240 (lshiftrt:V4SI
9241 (mult:V4SI
9242 (sign_extend:V4SI
9243 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9244 (sign_extend:V4SI
9245 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9246 (const_int 14))
9247 (const_vector:V4HI [(const_int 1) (const_int 1)
9248 (const_int 1) (const_int 1)]))
9249 (const_int 1))))]
9250 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9251 "pmulhrsw\t{%2, %0|%0, %2}"
9252 [(set_attr "type" "sseimul")
9253 (set_attr "prefix_extra" "1")
9254 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9255 (set_attr "mode" "DI")])
9256
9257 (define_insn "*avx_pshufbv16qi3"
9258 [(set (match_operand:V16QI 0 "register_operand" "=x")
9259 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9260 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9261 UNSPEC_PSHUFB))]
9262 "TARGET_AVX"
9263 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9264 [(set_attr "type" "sselog1")
9265 (set_attr "prefix_extra" "1")
9266 (set_attr "prefix" "vex")
9267 (set_attr "mode" "TI")])
9268
9269 (define_insn "ssse3_pshufbv16qi3"
9270 [(set (match_operand:V16QI 0 "register_operand" "=x")
9271 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9272 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9273 UNSPEC_PSHUFB))]
9274 "TARGET_SSSE3"
9275 "pshufb\t{%2, %0|%0, %2}";
9276 [(set_attr "type" "sselog1")
9277 (set_attr "prefix_data16" "1")
9278 (set_attr "prefix_extra" "1")
9279 (set_attr "mode" "TI")])
9280
9281 (define_insn "ssse3_pshufbv8qi3"
9282 [(set (match_operand:V8QI 0 "register_operand" "=y")
9283 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9284 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9285 UNSPEC_PSHUFB))]
9286 "TARGET_SSSE3"
9287 "pshufb\t{%2, %0|%0, %2}";
9288 [(set_attr "type" "sselog1")
9289 (set_attr "prefix_extra" "1")
9290 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9291 (set_attr "mode" "DI")])
9292
9293 (define_insn "*avx_psign<mode>3"
9294 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9295 (unspec:SSEMODE124
9296 [(match_operand:SSEMODE124 1 "register_operand" "x")
9297 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9298 UNSPEC_PSIGN))]
9299 "TARGET_AVX"
9300 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9301 [(set_attr "type" "sselog1")
9302 (set_attr "prefix_extra" "1")
9303 (set_attr "prefix" "vex")
9304 (set_attr "mode" "TI")])
9305
9306 (define_insn "ssse3_psign<mode>3"
9307 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9308 (unspec:SSEMODE124
9309 [(match_operand:SSEMODE124 1 "register_operand" "0")
9310 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9311 UNSPEC_PSIGN))]
9312 "TARGET_SSSE3"
9313 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9314 [(set_attr "type" "sselog1")
9315 (set_attr "prefix_data16" "1")
9316 (set_attr "prefix_extra" "1")
9317 (set_attr "mode" "TI")])
9318
9319 (define_insn "ssse3_psign<mode>3"
9320 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9321 (unspec:MMXMODEI
9322 [(match_operand:MMXMODEI 1 "register_operand" "0")
9323 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9324 UNSPEC_PSIGN))]
9325 "TARGET_SSSE3"
9326 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9327 [(set_attr "type" "sselog1")
9328 (set_attr "prefix_extra" "1")
9329 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9330 (set_attr "mode" "DI")])
9331
9332 (define_insn "*avx_palignrti"
9333 [(set (match_operand:TI 0 "register_operand" "=x")
9334 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9335 (match_operand:TI 2 "nonimmediate_operand" "xm")
9336 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9337 UNSPEC_PALIGNR))]
9338 "TARGET_AVX"
9339 {
9340 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9341 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9342 }
9343 [(set_attr "type" "sseishft")
9344 (set_attr "prefix_extra" "1")
9345 (set_attr "length_immediate" "1")
9346 (set_attr "prefix" "vex")
9347 (set_attr "mode" "TI")])
9348
9349 (define_insn "ssse3_palignrti"
9350 [(set (match_operand:TI 0 "register_operand" "=x")
9351 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9352 (match_operand:TI 2 "nonimmediate_operand" "xm")
9353 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9354 UNSPEC_PALIGNR))]
9355 "TARGET_SSSE3"
9356 {
9357 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9358 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9359 }
9360 [(set_attr "type" "sseishft")
9361 (set_attr "atom_unit" "sishuf")
9362 (set_attr "prefix_data16" "1")
9363 (set_attr "prefix_extra" "1")
9364 (set_attr "length_immediate" "1")
9365 (set_attr "mode" "TI")])
9366
9367 (define_insn "ssse3_palignrdi"
9368 [(set (match_operand:DI 0 "register_operand" "=y")
9369 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9370 (match_operand:DI 2 "nonimmediate_operand" "ym")
9371 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9372 UNSPEC_PALIGNR))]
9373 "TARGET_SSSE3"
9374 {
9375 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9376 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9377 }
9378 [(set_attr "type" "sseishft")
9379 (set_attr "atom_unit" "sishuf")
9380 (set_attr "prefix_extra" "1")
9381 (set_attr "length_immediate" "1")
9382 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9383 (set_attr "mode" "DI")])
9384
9385 (define_insn "abs<mode>2"
9386 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9387 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9388 "TARGET_SSSE3"
9389 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9390 [(set_attr "type" "sselog1")
9391 (set_attr "prefix_data16" "1")
9392 (set_attr "prefix_extra" "1")
9393 (set_attr "prefix" "maybe_vex")
9394 (set_attr "mode" "TI")])
9395
9396 (define_insn "abs<mode>2"
9397 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9398 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9399 "TARGET_SSSE3"
9400 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9401 [(set_attr "type" "sselog1")
9402 (set_attr "prefix_rep" "0")
9403 (set_attr "prefix_extra" "1")
9404 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9405 (set_attr "mode" "DI")])
9406
9407 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9408 ;;
9409 ;; AMD SSE4A instructions
9410 ;;
9411 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9412
9413 (define_insn "sse4a_movnt<mode>"
9414 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9415 (unspec:MODEF
9416 [(match_operand:MODEF 1 "register_operand" "x")]
9417 UNSPEC_MOVNT))]
9418 "TARGET_SSE4A"
9419 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9420 [(set_attr "type" "ssemov")
9421 (set_attr "mode" "<MODE>")])
9422
9423 (define_insn "sse4a_vmmovnt<mode>"
9424 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9425 (unspec:<ssescalarmode>
9426 [(vec_select:<ssescalarmode>
9427 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9428 (parallel [(const_int 0)]))]
9429 UNSPEC_MOVNT))]
9430 "TARGET_SSE4A"
9431 "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
9432 [(set_attr "type" "ssemov")
9433 (set_attr "mode" "<ssescalarmode>")])
9434
9435 (define_insn "sse4a_extrqi"
9436 [(set (match_operand:V2DI 0 "register_operand" "=x")
9437 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9438 (match_operand 2 "const_int_operand" "")
9439 (match_operand 3 "const_int_operand" "")]
9440 UNSPEC_EXTRQI))]
9441 "TARGET_SSE4A"
9442 "extrq\t{%3, %2, %0|%0, %2, %3}"
9443 [(set_attr "type" "sse")
9444 (set_attr "prefix_data16" "1")
9445 (set_attr "length_immediate" "2")
9446 (set_attr "mode" "TI")])
9447
9448 (define_insn "sse4a_extrq"
9449 [(set (match_operand:V2DI 0 "register_operand" "=x")
9450 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9451 (match_operand:V16QI 2 "register_operand" "x")]
9452 UNSPEC_EXTRQ))]
9453 "TARGET_SSE4A"
9454 "extrq\t{%2, %0|%0, %2}"
9455 [(set_attr "type" "sse")
9456 (set_attr "prefix_data16" "1")
9457 (set_attr "mode" "TI")])
9458
9459 (define_insn "sse4a_insertqi"
9460 [(set (match_operand:V2DI 0 "register_operand" "=x")
9461 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9462 (match_operand:V2DI 2 "register_operand" "x")
9463 (match_operand 3 "const_int_operand" "")
9464 (match_operand 4 "const_int_operand" "")]
9465 UNSPEC_INSERTQI))]
9466 "TARGET_SSE4A"
9467 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9468 [(set_attr "type" "sseins")
9469 (set_attr "prefix_data16" "0")
9470 (set_attr "prefix_rep" "1")
9471 (set_attr "length_immediate" "2")
9472 (set_attr "mode" "TI")])
9473
9474 (define_insn "sse4a_insertq"
9475 [(set (match_operand:V2DI 0 "register_operand" "=x")
9476 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9477 (match_operand:V2DI 2 "register_operand" "x")]
9478 UNSPEC_INSERTQ))]
9479 "TARGET_SSE4A"
9480 "insertq\t{%2, %0|%0, %2}"
9481 [(set_attr "type" "sseins")
9482 (set_attr "prefix_data16" "0")
9483 (set_attr "prefix_rep" "1")
9484 (set_attr "mode" "TI")])
9485
9486 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9487 ;;
9488 ;; Intel SSE4.1 instructions
9489 ;;
9490 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9491
9492 (define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
9493 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9494 (vec_merge:AVXMODEF2P
9495 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9496 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9497 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9498 "TARGET_AVX"
9499 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9500 [(set_attr "type" "ssemov")
9501 (set_attr "prefix_extra" "1")
9502 (set_attr "length_immediate" "1")
9503 (set_attr "prefix" "vex")
9504 (set_attr "mode" "<avxvecmode>")])
9505
9506 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
9507 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9508 (unspec:AVXMODEF2P
9509 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9510 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9511 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9512 UNSPEC_BLENDV))]
9513 "TARGET_AVX"
9514 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9515 [(set_attr "type" "ssemov")
9516 (set_attr "prefix_extra" "1")
9517 (set_attr "length_immediate" "1")
9518 (set_attr "prefix" "vex")
9519 (set_attr "mode" "<avxvecmode>")])
9520
9521 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
9522 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9523 (vec_merge:SSEMODEF2P
9524 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9525 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9526 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9527 "TARGET_SSE4_1"
9528 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9529 [(set_attr "type" "ssemov")
9530 (set_attr "prefix_data16" "1")
9531 (set_attr "prefix_extra" "1")
9532 (set_attr "length_immediate" "1")
9533 (set_attr "mode" "<MODE>")])
9534
9535 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
9536 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9537 (unspec:SSEMODEF2P
9538 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9539 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9540 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9541 UNSPEC_BLENDV))]
9542 "TARGET_SSE4_1"
9543 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9544 [(set_attr "type" "ssemov")
9545 (set_attr "prefix_data16" "1")
9546 (set_attr "prefix_extra" "1")
9547 (set_attr "mode" "<MODE>")])
9548
9549 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
9550 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9551 (unspec:AVXMODEF2P
9552 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9553 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9554 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9555 UNSPEC_DP))]
9556 "TARGET_AVX"
9557 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9558 [(set_attr "type" "ssemul")
9559 (set_attr "prefix" "vex")
9560 (set_attr "prefix_extra" "1")
9561 (set_attr "length_immediate" "1")
9562 (set_attr "mode" "<avxvecmode>")])
9563
9564 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
9565 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9566 (unspec:SSEMODEF2P
9567 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9568 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9569 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9570 UNSPEC_DP))]
9571 "TARGET_SSE4_1"
9572 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9573 [(set_attr "type" "ssemul")
9574 (set_attr "prefix_data16" "1")
9575 (set_attr "prefix_extra" "1")
9576 (set_attr "length_immediate" "1")
9577 (set_attr "mode" "<MODE>")])
9578
9579 (define_insn "sse4_1_movntdqa"
9580 [(set (match_operand:V2DI 0 "register_operand" "=x")
9581 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9582 UNSPEC_MOVNTDQA))]
9583 "TARGET_SSE4_1"
9584 "%vmovntdqa\t{%1, %0|%0, %1}"
9585 [(set_attr "type" "ssemov")
9586 (set_attr "prefix_extra" "1")
9587 (set_attr "prefix" "maybe_vex")
9588 (set_attr "mode" "TI")])
9589
9590 (define_insn "*avx_mpsadbw"
9591 [(set (match_operand:V16QI 0 "register_operand" "=x")
9592 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9593 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9594 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9595 UNSPEC_MPSADBW))]
9596 "TARGET_AVX"
9597 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9598 [(set_attr "type" "sselog1")
9599 (set_attr "prefix" "vex")
9600 (set_attr "prefix_extra" "1")
9601 (set_attr "length_immediate" "1")
9602 (set_attr "mode" "TI")])
9603
9604 (define_insn "sse4_1_mpsadbw"
9605 [(set (match_operand:V16QI 0 "register_operand" "=x")
9606 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9607 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9608 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9609 UNSPEC_MPSADBW))]
9610 "TARGET_SSE4_1"
9611 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9612 [(set_attr "type" "sselog1")
9613 (set_attr "prefix_extra" "1")
9614 (set_attr "length_immediate" "1")
9615 (set_attr "mode" "TI")])
9616
9617 (define_insn "*avx_packusdw"
9618 [(set (match_operand:V8HI 0 "register_operand" "=x")
9619 (vec_concat:V8HI
9620 (us_truncate:V4HI
9621 (match_operand:V4SI 1 "register_operand" "x"))
9622 (us_truncate:V4HI
9623 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9624 "TARGET_AVX"
9625 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9626 [(set_attr "type" "sselog")
9627 (set_attr "prefix_extra" "1")
9628 (set_attr "prefix" "vex")
9629 (set_attr "mode" "TI")])
9630
9631 (define_insn "sse4_1_packusdw"
9632 [(set (match_operand:V8HI 0 "register_operand" "=x")
9633 (vec_concat:V8HI
9634 (us_truncate:V4HI
9635 (match_operand:V4SI 1 "register_operand" "0"))
9636 (us_truncate:V4HI
9637 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9638 "TARGET_SSE4_1"
9639 "packusdw\t{%2, %0|%0, %2}"
9640 [(set_attr "type" "sselog")
9641 (set_attr "prefix_extra" "1")
9642 (set_attr "mode" "TI")])
9643
9644 (define_insn "*avx_pblendvb"
9645 [(set (match_operand:V16QI 0 "register_operand" "=x")
9646 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9647 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9648 (match_operand:V16QI 3 "register_operand" "x")]
9649 UNSPEC_BLENDV))]
9650 "TARGET_AVX"
9651 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9652 [(set_attr "type" "ssemov")
9653 (set_attr "prefix_extra" "1")
9654 (set_attr "length_immediate" "1")
9655 (set_attr "prefix" "vex")
9656 (set_attr "mode" "TI")])
9657
9658 (define_insn "sse4_1_pblendvb"
9659 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9660 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9661 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9662 (match_operand:V16QI 3 "register_operand" "Yz")]
9663 UNSPEC_BLENDV))]
9664 "TARGET_SSE4_1"
9665 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9666 [(set_attr "type" "ssemov")
9667 (set_attr "prefix_extra" "1")
9668 (set_attr "mode" "TI")])
9669
9670 (define_insn "*avx_pblendw"
9671 [(set (match_operand:V8HI 0 "register_operand" "=x")
9672 (vec_merge:V8HI
9673 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9674 (match_operand:V8HI 1 "register_operand" "x")
9675 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9676 "TARGET_AVX"
9677 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9678 [(set_attr "type" "ssemov")
9679 (set_attr "prefix" "vex")
9680 (set_attr "prefix_extra" "1")
9681 (set_attr "length_immediate" "1")
9682 (set_attr "mode" "TI")])
9683
9684 (define_insn "sse4_1_pblendw"
9685 [(set (match_operand:V8HI 0 "register_operand" "=x")
9686 (vec_merge:V8HI
9687 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9688 (match_operand:V8HI 1 "register_operand" "0")
9689 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9690 "TARGET_SSE4_1"
9691 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9692 [(set_attr "type" "ssemov")
9693 (set_attr "prefix_extra" "1")
9694 (set_attr "length_immediate" "1")
9695 (set_attr "mode" "TI")])
9696
9697 (define_insn "sse4_1_phminposuw"
9698 [(set (match_operand:V8HI 0 "register_operand" "=x")
9699 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9700 UNSPEC_PHMINPOSUW))]
9701 "TARGET_SSE4_1"
9702 "%vphminposuw\t{%1, %0|%0, %1}"
9703 [(set_attr "type" "sselog1")
9704 (set_attr "prefix_extra" "1")
9705 (set_attr "prefix" "maybe_vex")
9706 (set_attr "mode" "TI")])
9707
9708 (define_insn "sse4_1_extendv8qiv8hi2"
9709 [(set (match_operand:V8HI 0 "register_operand" "=x")
9710 (sign_extend:V8HI
9711 (vec_select:V8QI
9712 (match_operand:V16QI 1 "register_operand" "x")
9713 (parallel [(const_int 0)
9714 (const_int 1)
9715 (const_int 2)
9716 (const_int 3)
9717 (const_int 4)
9718 (const_int 5)
9719 (const_int 6)
9720 (const_int 7)]))))]
9721 "TARGET_SSE4_1"
9722 "%vpmovsxbw\t{%1, %0|%0, %1}"
9723 [(set_attr "type" "ssemov")
9724 (set_attr "prefix_extra" "1")
9725 (set_attr "prefix" "maybe_vex")
9726 (set_attr "mode" "TI")])
9727
9728 (define_insn "*sse4_1_extendv8qiv8hi2"
9729 [(set (match_operand:V8HI 0 "register_operand" "=x")
9730 (sign_extend:V8HI
9731 (vec_select:V8QI
9732 (vec_duplicate:V16QI
9733 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9734 (parallel [(const_int 0)
9735 (const_int 1)
9736 (const_int 2)
9737 (const_int 3)
9738 (const_int 4)
9739 (const_int 5)
9740 (const_int 6)
9741 (const_int 7)]))))]
9742 "TARGET_SSE4_1"
9743 "%vpmovsxbw\t{%1, %0|%0, %1}"
9744 [(set_attr "type" "ssemov")
9745 (set_attr "prefix_extra" "1")
9746 (set_attr "prefix" "maybe_vex")
9747 (set_attr "mode" "TI")])
9748
9749 (define_insn "sse4_1_extendv4qiv4si2"
9750 [(set (match_operand:V4SI 0 "register_operand" "=x")
9751 (sign_extend:V4SI
9752 (vec_select:V4QI
9753 (match_operand:V16QI 1 "register_operand" "x")
9754 (parallel [(const_int 0)
9755 (const_int 1)
9756 (const_int 2)
9757 (const_int 3)]))))]
9758 "TARGET_SSE4_1"
9759 "%vpmovsxbd\t{%1, %0|%0, %1}"
9760 [(set_attr "type" "ssemov")
9761 (set_attr "prefix_extra" "1")
9762 (set_attr "prefix" "maybe_vex")
9763 (set_attr "mode" "TI")])
9764
9765 (define_insn "*sse4_1_extendv4qiv4si2"
9766 [(set (match_operand:V4SI 0 "register_operand" "=x")
9767 (sign_extend:V4SI
9768 (vec_select:V4QI
9769 (vec_duplicate:V16QI
9770 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9771 (parallel [(const_int 0)
9772 (const_int 1)
9773 (const_int 2)
9774 (const_int 3)]))))]
9775 "TARGET_SSE4_1"
9776 "%vpmovsxbd\t{%1, %0|%0, %1}"
9777 [(set_attr "type" "ssemov")
9778 (set_attr "prefix_extra" "1")
9779 (set_attr "prefix" "maybe_vex")
9780 (set_attr "mode" "TI")])
9781
9782 (define_insn "sse4_1_extendv2qiv2di2"
9783 [(set (match_operand:V2DI 0 "register_operand" "=x")
9784 (sign_extend:V2DI
9785 (vec_select:V2QI
9786 (match_operand:V16QI 1 "register_operand" "x")
9787 (parallel [(const_int 0)
9788 (const_int 1)]))))]
9789 "TARGET_SSE4_1"
9790 "%vpmovsxbq\t{%1, %0|%0, %1}"
9791 [(set_attr "type" "ssemov")
9792 (set_attr "prefix_extra" "1")
9793 (set_attr "prefix" "maybe_vex")
9794 (set_attr "mode" "TI")])
9795
9796 (define_insn "*sse4_1_extendv2qiv2di2"
9797 [(set (match_operand:V2DI 0 "register_operand" "=x")
9798 (sign_extend:V2DI
9799 (vec_select:V2QI
9800 (vec_duplicate:V16QI
9801 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9802 (parallel [(const_int 0)
9803 (const_int 1)]))))]
9804 "TARGET_SSE4_1"
9805 "%vpmovsxbq\t{%1, %0|%0, %1}"
9806 [(set_attr "type" "ssemov")
9807 (set_attr "prefix_extra" "1")
9808 (set_attr "prefix" "maybe_vex")
9809 (set_attr "mode" "TI")])
9810
9811 (define_insn "sse4_1_extendv4hiv4si2"
9812 [(set (match_operand:V4SI 0 "register_operand" "=x")
9813 (sign_extend:V4SI
9814 (vec_select:V4HI
9815 (match_operand:V8HI 1 "register_operand" "x")
9816 (parallel [(const_int 0)
9817 (const_int 1)
9818 (const_int 2)
9819 (const_int 3)]))))]
9820 "TARGET_SSE4_1"
9821 "%vpmovsxwd\t{%1, %0|%0, %1}"
9822 [(set_attr "type" "ssemov")
9823 (set_attr "prefix_extra" "1")
9824 (set_attr "prefix" "maybe_vex")
9825 (set_attr "mode" "TI")])
9826
9827 (define_insn "*sse4_1_extendv4hiv4si2"
9828 [(set (match_operand:V4SI 0 "register_operand" "=x")
9829 (sign_extend:V4SI
9830 (vec_select:V4HI
9831 (vec_duplicate:V8HI
9832 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9833 (parallel [(const_int 0)
9834 (const_int 1)
9835 (const_int 2)
9836 (const_int 3)]))))]
9837 "TARGET_SSE4_1"
9838 "%vpmovsxwd\t{%1, %0|%0, %1}"
9839 [(set_attr "type" "ssemov")
9840 (set_attr "prefix_extra" "1")
9841 (set_attr "prefix" "maybe_vex")
9842 (set_attr "mode" "TI")])
9843
9844 (define_insn "sse4_1_extendv2hiv2di2"
9845 [(set (match_operand:V2DI 0 "register_operand" "=x")
9846 (sign_extend:V2DI
9847 (vec_select:V2HI
9848 (match_operand:V8HI 1 "register_operand" "x")
9849 (parallel [(const_int 0)
9850 (const_int 1)]))))]
9851 "TARGET_SSE4_1"
9852 "%vpmovsxwq\t{%1, %0|%0, %1}"
9853 [(set_attr "type" "ssemov")
9854 (set_attr "prefix_extra" "1")
9855 (set_attr "prefix" "maybe_vex")
9856 (set_attr "mode" "TI")])
9857
9858 (define_insn "*sse4_1_extendv2hiv2di2"
9859 [(set (match_operand:V2DI 0 "register_operand" "=x")
9860 (sign_extend:V2DI
9861 (vec_select:V2HI
9862 (vec_duplicate:V8HI
9863 (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9864 (parallel [(const_int 0)
9865 (const_int 1)]))))]
9866 "TARGET_SSE4_1"
9867 "%vpmovsxwq\t{%1, %0|%0, %1}"
9868 [(set_attr "type" "ssemov")
9869 (set_attr "prefix_extra" "1")
9870 (set_attr "prefix" "maybe_vex")
9871 (set_attr "mode" "TI")])
9872
9873 (define_insn "sse4_1_extendv2siv2di2"
9874 [(set (match_operand:V2DI 0 "register_operand" "=x")
9875 (sign_extend:V2DI
9876 (vec_select:V2SI
9877 (match_operand:V4SI 1 "register_operand" "x")
9878 (parallel [(const_int 0)
9879 (const_int 1)]))))]
9880 "TARGET_SSE4_1"
9881 "%vpmovsxdq\t{%1, %0|%0, %1}"
9882 [(set_attr "type" "ssemov")
9883 (set_attr "prefix_extra" "1")
9884 (set_attr "prefix" "maybe_vex")
9885 (set_attr "mode" "TI")])
9886
9887 (define_insn "*sse4_1_extendv2siv2di2"
9888 [(set (match_operand:V2DI 0 "register_operand" "=x")
9889 (sign_extend:V2DI
9890 (vec_select:V2SI
9891 (vec_duplicate:V4SI
9892 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9893 (parallel [(const_int 0)
9894 (const_int 1)]))))]
9895 "TARGET_SSE4_1"
9896 "%vpmovsxdq\t{%1, %0|%0, %1}"
9897 [(set_attr "type" "ssemov")
9898 (set_attr "prefix_extra" "1")
9899 (set_attr "prefix" "maybe_vex")
9900 (set_attr "mode" "TI")])
9901
9902 (define_insn "sse4_1_zero_extendv8qiv8hi2"
9903 [(set (match_operand:V8HI 0 "register_operand" "=x")
9904 (zero_extend:V8HI
9905 (vec_select:V8QI
9906 (match_operand:V16QI 1 "register_operand" "x")
9907 (parallel [(const_int 0)
9908 (const_int 1)
9909 (const_int 2)
9910 (const_int 3)
9911 (const_int 4)
9912 (const_int 5)
9913 (const_int 6)
9914 (const_int 7)]))))]
9915 "TARGET_SSE4_1"
9916 "%vpmovzxbw\t{%1, %0|%0, %1}"
9917 [(set_attr "type" "ssemov")
9918 (set_attr "prefix_extra" "1")
9919 (set_attr "prefix" "maybe_vex")
9920 (set_attr "mode" "TI")])
9921
9922 (define_insn "*sse4_1_zero_extendv8qiv8hi2"
9923 [(set (match_operand:V8HI 0 "register_operand" "=x")
9924 (zero_extend:V8HI
9925 (vec_select:V8QI
9926 (vec_duplicate:V16QI
9927 (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9928 (parallel [(const_int 0)
9929 (const_int 1)
9930 (const_int 2)
9931 (const_int 3)
9932 (const_int 4)
9933 (const_int 5)
9934 (const_int 6)
9935 (const_int 7)]))))]
9936 "TARGET_SSE4_1"
9937 "%vpmovzxbw\t{%1, %0|%0, %1}"
9938 [(set_attr "type" "ssemov")
9939 (set_attr "prefix_extra" "1")
9940 (set_attr "prefix" "maybe_vex")
9941 (set_attr "mode" "TI")])
9942
9943 (define_insn "sse4_1_zero_extendv4qiv4si2"
9944 [(set (match_operand:V4SI 0 "register_operand" "=x")
9945 (zero_extend:V4SI
9946 (vec_select:V4QI
9947 (match_operand:V16QI 1 "register_operand" "x")
9948 (parallel [(const_int 0)
9949 (const_int 1)
9950 (const_int 2)
9951 (const_int 3)]))))]
9952 "TARGET_SSE4_1"
9953 "%vpmovzxbd\t{%1, %0|%0, %1}"
9954 [(set_attr "type" "ssemov")
9955 (set_attr "prefix_extra" "1")
9956 (set_attr "prefix" "maybe_vex")
9957 (set_attr "mode" "TI")])
9958
9959 (define_insn "*sse4_1_zero_extendv4qiv4si2"
9960 [(set (match_operand:V4SI 0 "register_operand" "=x")
9961 (zero_extend:V4SI
9962 (vec_select:V4QI
9963 (vec_duplicate:V16QI
9964 (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9965 (parallel [(const_int 0)
9966 (const_int 1)
9967 (const_int 2)
9968 (const_int 3)]))))]
9969 "TARGET_SSE4_1"
9970 "%vpmovzxbd\t{%1, %0|%0, %1}"
9971 [(set_attr "type" "ssemov")
9972 (set_attr "prefix_extra" "1")
9973 (set_attr "prefix" "maybe_vex")
9974 (set_attr "mode" "TI")])
9975
9976 (define_insn "sse4_1_zero_extendv2qiv2di2"
9977 [(set (match_operand:V2DI 0 "register_operand" "=x")
9978 (zero_extend:V2DI
9979 (vec_select:V2QI
9980 (match_operand:V16QI 1 "register_operand" "x")
9981 (parallel [(const_int 0)
9982 (const_int 1)]))))]
9983 "TARGET_SSE4_1"
9984 "%vpmovzxbq\t{%1, %0|%0, %1}"
9985 [(set_attr "type" "ssemov")
9986 (set_attr "prefix_extra" "1")
9987 (set_attr "prefix" "maybe_vex")
9988 (set_attr "mode" "TI")])
9989
9990 (define_insn "*sse4_1_zero_extendv2qiv2di2"
9991 [(set (match_operand:V2DI 0 "register_operand" "=x")
9992 (zero_extend:V2DI
9993 (vec_select:V2QI
9994 (vec_duplicate:V16QI
9995 (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9996 (parallel [(const_int 0)
9997 (const_int 1)]))))]
9998 "TARGET_SSE4_1"
9999 "%vpmovzxbq\t{%1, %0|%0, %1}"
10000 [(set_attr "type" "ssemov")
10001 (set_attr "prefix_extra" "1")
10002 (set_attr "prefix" "maybe_vex")
10003 (set_attr "mode" "TI")])
10004
10005 (define_insn "sse4_1_zero_extendv4hiv4si2"
10006 [(set (match_operand:V4SI 0 "register_operand" "=x")
10007 (zero_extend:V4SI
10008 (vec_select:V4HI
10009 (match_operand:V8HI 1 "register_operand" "x")
10010 (parallel [(const_int 0)
10011 (const_int 1)
10012 (const_int 2)
10013 (const_int 3)]))))]
10014 "TARGET_SSE4_1"
10015 "%vpmovzxwd\t{%1, %0|%0, %1}"
10016 [(set_attr "type" "ssemov")
10017 (set_attr "prefix_extra" "1")
10018 (set_attr "prefix" "maybe_vex")
10019 (set_attr "mode" "TI")])
10020
10021 (define_insn "*sse4_1_zero_extendv4hiv4si2"
10022 [(set (match_operand:V4SI 0 "register_operand" "=x")
10023 (zero_extend:V4SI
10024 (vec_select:V4HI
10025 (vec_duplicate:V8HI
10026 (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
10027 (parallel [(const_int 0)
10028 (const_int 1)
10029 (const_int 2)
10030 (const_int 3)]))))]
10031 "TARGET_SSE4_1"
10032 "%vpmovzxwd\t{%1, %0|%0, %1}"
10033 [(set_attr "type" "ssemov")
10034 (set_attr "prefix_extra" "1")
10035 (set_attr "prefix" "maybe_vex")
10036 (set_attr "mode" "TI")])
10037
10038 (define_insn "sse4_1_zero_extendv2hiv2di2"
10039 [(set (match_operand:V2DI 0 "register_operand" "=x")
10040 (zero_extend:V2DI
10041 (vec_select:V2HI
10042 (match_operand:V8HI 1 "register_operand" "x")
10043 (parallel [(const_int 0)
10044 (const_int 1)]))))]
10045 "TARGET_SSE4_1"
10046 "%vpmovzxwq\t{%1, %0|%0, %1}"
10047 [(set_attr "type" "ssemov")
10048 (set_attr "prefix_extra" "1")
10049 (set_attr "prefix" "maybe_vex")
10050 (set_attr "mode" "TI")])
10051
10052 (define_insn "*sse4_1_zero_extendv2hiv2di2"
10053 [(set (match_operand:V2DI 0 "register_operand" "=x")
10054 (zero_extend:V2DI
10055 (vec_select:V2HI
10056 (vec_duplicate:V8HI
10057 (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
10058 (parallel [(const_int 0)
10059 (const_int 1)]))))]
10060 "TARGET_SSE4_1"
10061 "%vpmovzxwq\t{%1, %0|%0, %1}"
10062 [(set_attr "type" "ssemov")
10063 (set_attr "prefix_extra" "1")
10064 (set_attr "prefix" "maybe_vex")
10065 (set_attr "mode" "TI")])
10066
10067 (define_insn "sse4_1_zero_extendv2siv2di2"
10068 [(set (match_operand:V2DI 0 "register_operand" "=x")
10069 (zero_extend:V2DI
10070 (vec_select:V2SI
10071 (match_operand:V4SI 1 "register_operand" "x")
10072 (parallel [(const_int 0)
10073 (const_int 1)]))))]
10074 "TARGET_SSE4_1"
10075 "%vpmovzxdq\t{%1, %0|%0, %1}"
10076 [(set_attr "type" "ssemov")
10077 (set_attr "prefix_extra" "1")
10078 (set_attr "prefix" "maybe_vex")
10079 (set_attr "mode" "TI")])
10080
10081 (define_insn "*sse4_1_zero_extendv2siv2di2"
10082 [(set (match_operand:V2DI 0 "register_operand" "=x")
10083 (zero_extend:V2DI
10084 (vec_select:V2SI
10085 (vec_duplicate:V4SI
10086 (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
10087 (parallel [(const_int 0)
10088 (const_int 1)]))))]
10089 "TARGET_SSE4_1"
10090 "%vpmovzxdq\t{%1, %0|%0, %1}"
10091 [(set_attr "type" "ssemov")
10092 (set_attr "prefix_extra" "1")
10093 (set_attr "prefix" "maybe_vex")
10094 (set_attr "mode" "TI")])
10095
10096 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
10097 ;; setting FLAGS_REG. But it is not a really compare instruction.
10098 (define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
10099 [(set (reg:CC FLAGS_REG)
10100 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
10101 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
10102 UNSPEC_VTESTP))]
10103 "TARGET_AVX"
10104 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
10105 [(set_attr "type" "ssecomi")
10106 (set_attr "prefix_extra" "1")
10107 (set_attr "prefix" "vex")
10108 (set_attr "mode" "<MODE>")])
10109
10110 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
10111 ;; But it is not a really compare instruction.
10112 (define_insn "avx_ptest256"
10113 [(set (reg:CC FLAGS_REG)
10114 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
10115 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
10116 UNSPEC_PTEST))]
10117 "TARGET_AVX"
10118 "vptest\t{%1, %0|%0, %1}"
10119 [(set_attr "type" "ssecomi")
10120 (set_attr "prefix_extra" "1")
10121 (set_attr "prefix" "vex")
10122 (set_attr "mode" "OI")])
10123
10124 (define_insn "sse4_1_ptest"
10125 [(set (reg:CC FLAGS_REG)
10126 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
10127 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
10128 UNSPEC_PTEST))]
10129 "TARGET_SSE4_1"
10130 "%vptest\t{%1, %0|%0, %1}"
10131 [(set_attr "type" "ssecomi")
10132 (set_attr "prefix_extra" "1")
10133 (set_attr "prefix" "maybe_vex")
10134 (set_attr "mode" "TI")])
10135
10136 (define_insn "avx_roundp<avxmodesuffixf2c>256"
10137 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
10138 (unspec:AVX256MODEF2P
10139 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
10140 (match_operand:SI 2 "const_0_to_15_operand" "n")]
10141 UNSPEC_ROUND))]
10142 "TARGET_AVX"
10143 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
10144 [(set_attr "type" "ssecvt")
10145 (set_attr "prefix_extra" "1")
10146 (set_attr "length_immediate" "1")
10147 (set_attr "prefix" "vex")
10148 (set_attr "mode" "<MODE>")])
10149
10150 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
10151 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10152 (unspec:SSEMODEF2P
10153 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
10154 (match_operand:SI 2 "const_0_to_15_operand" "n")]
10155 UNSPEC_ROUND))]
10156 "TARGET_ROUND"
10157 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
10158 [(set_attr "type" "ssecvt")
10159 (set_attr "prefix_data16" "1")
10160 (set_attr "prefix_extra" "1")
10161 (set_attr "length_immediate" "1")
10162 (set_attr "prefix" "maybe_vex")
10163 (set_attr "mode" "<MODE>")])
10164
10165 (define_insn "*avx_rounds<ssemodesuffixf2c>"
10166 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10167 (vec_merge:SSEMODEF2P
10168 (unspec:SSEMODEF2P
10169 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10170 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10171 UNSPEC_ROUND)
10172 (match_operand:SSEMODEF2P 1 "register_operand" "x")
10173 (const_int 1)))]
10174 "TARGET_AVX"
10175 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10176 [(set_attr "type" "ssecvt")
10177 (set_attr "prefix_extra" "1")
10178 (set_attr "length_immediate" "1")
10179 (set_attr "prefix" "vex")
10180 (set_attr "mode" "<MODE>")])
10181
10182 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
10183 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10184 (vec_merge:SSEMODEF2P
10185 (unspec:SSEMODEF2P
10186 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10187 (match_operand:SI 3 "const_0_to_15_operand" "n")]
10188 UNSPEC_ROUND)
10189 (match_operand:SSEMODEF2P 1 "register_operand" "0")
10190 (const_int 1)))]
10191 "TARGET_ROUND"
10192 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
10193 [(set_attr "type" "ssecvt")
10194 (set_attr "prefix_data16" "1")
10195 (set_attr "prefix_extra" "1")
10196 (set_attr "length_immediate" "1")
10197 (set_attr "mode" "<MODE>")])
10198
10199 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10200 ;;
10201 ;; Intel SSE4.2 string/text processing instructions
10202 ;;
10203 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10204
10205 (define_insn_and_split "sse4_2_pcmpestr"
10206 [(set (match_operand:SI 0 "register_operand" "=c,c")
10207 (unspec:SI
10208 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10209 (match_operand:SI 3 "register_operand" "a,a")
10210 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10211 (match_operand:SI 5 "register_operand" "d,d")
10212 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10213 UNSPEC_PCMPESTR))
10214 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10215 (unspec:V16QI
10216 [(match_dup 2)
10217 (match_dup 3)
10218 (match_dup 4)
10219 (match_dup 5)
10220 (match_dup 6)]
10221 UNSPEC_PCMPESTR))
10222 (set (reg:CC FLAGS_REG)
10223 (unspec:CC
10224 [(match_dup 2)
10225 (match_dup 3)
10226 (match_dup 4)
10227 (match_dup 5)
10228 (match_dup 6)]
10229 UNSPEC_PCMPESTR))]
10230 "TARGET_SSE4_2
10231 && can_create_pseudo_p ()"
10232 "#"
10233 "&& 1"
10234 [(const_int 0)]
10235 {
10236 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10237 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10238 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10239
10240 if (ecx)
10241 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10242 operands[3], operands[4],
10243 operands[5], operands[6]));
10244 if (xmm0)
10245 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10246 operands[3], operands[4],
10247 operands[5], operands[6]));
10248 if (flags && !(ecx || xmm0))
10249 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10250 operands[2], operands[3],
10251 operands[4], operands[5],
10252 operands[6]));
10253 DONE;
10254 }
10255 [(set_attr "type" "sselog")
10256 (set_attr "prefix_data16" "1")
10257 (set_attr "prefix_extra" "1")
10258 (set_attr "length_immediate" "1")
10259 (set_attr "memory" "none,load")
10260 (set_attr "mode" "TI")])
10261
10262 (define_insn "sse4_2_pcmpestri"
10263 [(set (match_operand:SI 0 "register_operand" "=c,c")
10264 (unspec:SI
10265 [(match_operand:V16QI 1 "register_operand" "x,x")
10266 (match_operand:SI 2 "register_operand" "a,a")
10267 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10268 (match_operand:SI 4 "register_operand" "d,d")
10269 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10270 UNSPEC_PCMPESTR))
10271 (set (reg:CC FLAGS_REG)
10272 (unspec:CC
10273 [(match_dup 1)
10274 (match_dup 2)
10275 (match_dup 3)
10276 (match_dup 4)
10277 (match_dup 5)]
10278 UNSPEC_PCMPESTR))]
10279 "TARGET_SSE4_2"
10280 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10281 [(set_attr "type" "sselog")
10282 (set_attr "prefix_data16" "1")
10283 (set_attr "prefix_extra" "1")
10284 (set_attr "prefix" "maybe_vex")
10285 (set_attr "length_immediate" "1")
10286 (set_attr "memory" "none,load")
10287 (set_attr "mode" "TI")])
10288
10289 (define_insn "sse4_2_pcmpestrm"
10290 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10291 (unspec:V16QI
10292 [(match_operand:V16QI 1 "register_operand" "x,x")
10293 (match_operand:SI 2 "register_operand" "a,a")
10294 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10295 (match_operand:SI 4 "register_operand" "d,d")
10296 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10297 UNSPEC_PCMPESTR))
10298 (set (reg:CC FLAGS_REG)
10299 (unspec:CC
10300 [(match_dup 1)
10301 (match_dup 2)
10302 (match_dup 3)
10303 (match_dup 4)
10304 (match_dup 5)]
10305 UNSPEC_PCMPESTR))]
10306 "TARGET_SSE4_2"
10307 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10308 [(set_attr "type" "sselog")
10309 (set_attr "prefix_data16" "1")
10310 (set_attr "prefix_extra" "1")
10311 (set_attr "length_immediate" "1")
10312 (set_attr "prefix" "maybe_vex")
10313 (set_attr "memory" "none,load")
10314 (set_attr "mode" "TI")])
10315
10316 (define_insn "sse4_2_pcmpestr_cconly"
10317 [(set (reg:CC FLAGS_REG)
10318 (unspec:CC
10319 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10320 (match_operand:SI 3 "register_operand" "a,a,a,a")
10321 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10322 (match_operand:SI 5 "register_operand" "d,d,d,d")
10323 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10324 UNSPEC_PCMPESTR))
10325 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10326 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10327 "TARGET_SSE4_2"
10328 "@
10329 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10330 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10331 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10332 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10333 [(set_attr "type" "sselog")
10334 (set_attr "prefix_data16" "1")
10335 (set_attr "prefix_extra" "1")
10336 (set_attr "length_immediate" "1")
10337 (set_attr "memory" "none,load,none,load")
10338 (set_attr "prefix" "maybe_vex")
10339 (set_attr "mode" "TI")])
10340
10341 (define_insn_and_split "sse4_2_pcmpistr"
10342 [(set (match_operand:SI 0 "register_operand" "=c,c")
10343 (unspec:SI
10344 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10345 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10346 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10347 UNSPEC_PCMPISTR))
10348 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10349 (unspec:V16QI
10350 [(match_dup 2)
10351 (match_dup 3)
10352 (match_dup 4)]
10353 UNSPEC_PCMPISTR))
10354 (set (reg:CC FLAGS_REG)
10355 (unspec:CC
10356 [(match_dup 2)
10357 (match_dup 3)
10358 (match_dup 4)]
10359 UNSPEC_PCMPISTR))]
10360 "TARGET_SSE4_2
10361 && can_create_pseudo_p ()"
10362 "#"
10363 "&& 1"
10364 [(const_int 0)]
10365 {
10366 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10367 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10368 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10369
10370 if (ecx)
10371 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10372 operands[3], operands[4]));
10373 if (xmm0)
10374 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10375 operands[3], operands[4]));
10376 if (flags && !(ecx || xmm0))
10377 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10378 operands[2], operands[3],
10379 operands[4]));
10380 DONE;
10381 }
10382 [(set_attr "type" "sselog")
10383 (set_attr "prefix_data16" "1")
10384 (set_attr "prefix_extra" "1")
10385 (set_attr "length_immediate" "1")
10386 (set_attr "memory" "none,load")
10387 (set_attr "mode" "TI")])
10388
10389 (define_insn "sse4_2_pcmpistri"
10390 [(set (match_operand:SI 0 "register_operand" "=c,c")
10391 (unspec:SI
10392 [(match_operand:V16QI 1 "register_operand" "x,x")
10393 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10394 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10395 UNSPEC_PCMPISTR))
10396 (set (reg:CC FLAGS_REG)
10397 (unspec:CC
10398 [(match_dup 1)
10399 (match_dup 2)
10400 (match_dup 3)]
10401 UNSPEC_PCMPISTR))]
10402 "TARGET_SSE4_2"
10403 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10404 [(set_attr "type" "sselog")
10405 (set_attr "prefix_data16" "1")
10406 (set_attr "prefix_extra" "1")
10407 (set_attr "length_immediate" "1")
10408 (set_attr "prefix" "maybe_vex")
10409 (set_attr "memory" "none,load")
10410 (set_attr "mode" "TI")])
10411
10412 (define_insn "sse4_2_pcmpistrm"
10413 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10414 (unspec:V16QI
10415 [(match_operand:V16QI 1 "register_operand" "x,x")
10416 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10417 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10418 UNSPEC_PCMPISTR))
10419 (set (reg:CC FLAGS_REG)
10420 (unspec:CC
10421 [(match_dup 1)
10422 (match_dup 2)
10423 (match_dup 3)]
10424 UNSPEC_PCMPISTR))]
10425 "TARGET_SSE4_2"
10426 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10427 [(set_attr "type" "sselog")
10428 (set_attr "prefix_data16" "1")
10429 (set_attr "prefix_extra" "1")
10430 (set_attr "length_immediate" "1")
10431 (set_attr "prefix" "maybe_vex")
10432 (set_attr "memory" "none,load")
10433 (set_attr "mode" "TI")])
10434
10435 (define_insn "sse4_2_pcmpistr_cconly"
10436 [(set (reg:CC FLAGS_REG)
10437 (unspec:CC
10438 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10439 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10440 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10441 UNSPEC_PCMPISTR))
10442 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10443 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10444 "TARGET_SSE4_2"
10445 "@
10446 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10447 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10448 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10449 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10450 [(set_attr "type" "sselog")
10451 (set_attr "prefix_data16" "1")
10452 (set_attr "prefix_extra" "1")
10453 (set_attr "length_immediate" "1")
10454 (set_attr "memory" "none,load,none,load")
10455 (set_attr "prefix" "maybe_vex")
10456 (set_attr "mode" "TI")])
10457
10458 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10459 ;;
10460 ;; XOP instructions
10461 ;;
10462 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10463
10464 ;; XOP parallel integer multiply/add instructions.
10465 ;; Note the instruction does not allow the value being added to be a memory
10466 ;; operation. However by pretending via the nonimmediate_operand predicate
10467 ;; that it does and splitting it later allows the following to be recognized:
10468 ;; a[i] = b[i] * c[i] + d[i];
10469 (define_insn "xop_pmacsww"
10470 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10471 (plus:V8HI
10472 (mult:V8HI
10473 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10474 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x"))
10475 (match_operand:V8HI 3 "register_operand" "x,x")))]
10476 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
10477 "@
10478 vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10479 vpmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10480 [(set_attr "type" "ssemuladd")
10481 (set_attr "mode" "TI")])
10482
10483 ;; Split pmacsww with two memory operands into a load and the pmacsww.
10484 (define_split
10485 [(set (match_operand:V8HI 0 "register_operand" "")
10486 (plus:V8HI
10487 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
10488 (match_operand:V8HI 2 "nonimmediate_operand" ""))
10489 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
10490 "TARGET_XOP
10491 && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
10492 && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
10493 && !reg_mentioned_p (operands[0], operands[1])
10494 && !reg_mentioned_p (operands[0], operands[2])
10495 && !reg_mentioned_p (operands[0], operands[3])"
10496 [(const_int 0)]
10497 {
10498 ix86_expand_fma4_multiple_memory (operands, 4, V8HImode);
10499 emit_insn (gen_xop_pmacsww (operands[0], operands[1], operands[2],
10500 operands[3]));
10501 DONE;
10502 })
10503
10504 (define_insn "xop_pmacssww"
10505 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10506 (ss_plus:V8HI
10507 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10508 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x"))
10509 (match_operand:V8HI 3 "register_operand" "x,x")))]
10510 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10511 "@
10512 vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
10513 vpmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10514 [(set_attr "type" "ssemuladd")
10515 (set_attr "mode" "TI")])
10516
10517 ;; Note the instruction does not allow the value being added to be a memory
10518 ;; operation. However by pretending via the nonimmediate_operand predicate
10519 ;; that it does and splitting it later allows the following to be recognized:
10520 ;; a[i] = b[i] * c[i] + d[i];
10521 (define_insn "xop_pmacsdd"
10522 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10523 (plus:V4SI
10524 (mult:V4SI
10525 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10526 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x"))
10527 (match_operand:V4SI 3 "register_operand" "x,x")))]
10528 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)"
10529 "@
10530 vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10531 vpmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10532 [(set_attr "type" "ssemuladd")
10533 (set_attr "mode" "TI")])
10534
10535 ;; Split pmacsdd with two memory operands into a load and the pmacsdd.
10536 (define_split
10537 [(set (match_operand:V4SI 0 "register_operand" "")
10538 (plus:V4SI
10539 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
10540 (match_operand:V4SI 2 "nonimmediate_operand" ""))
10541 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
10542 "TARGET_XOP
10543 && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)
10544 && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)
10545 && !reg_mentioned_p (operands[0], operands[1])
10546 && !reg_mentioned_p (operands[0], operands[2])
10547 && !reg_mentioned_p (operands[0], operands[3])"
10548 [(const_int 0)]
10549 {
10550 ix86_expand_fma4_multiple_memory (operands, 4, V4SImode);
10551 emit_insn (gen_xop_pmacsdd (operands[0], operands[1], operands[2],
10552 operands[3]));
10553 DONE;
10554 })
10555
10556 (define_insn "xop_pmacssdd"
10557 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10558 (ss_plus:V4SI
10559 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10560 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x"))
10561 (match_operand:V4SI 3 "register_operand" "x,x")))]
10562 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10563 "@
10564 vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10565 vpmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10566 [(set_attr "type" "ssemuladd")
10567 (set_attr "mode" "TI")])
10568
10569 (define_insn "xop_pmacssdql"
10570 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10571 (ss_plus:V2DI
10572 (mult:V2DI
10573 (sign_extend:V2DI
10574 (vec_select:V2SI
10575 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10576 (parallel [(const_int 1)
10577 (const_int 3)])))
10578 (vec_select:V2SI
10579 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10580 (parallel [(const_int 1)
10581 (const_int 3)])))
10582 (match_operand:V2DI 3 "register_operand" "x,x")))]
10583 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10584 "@
10585 vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10586 vpmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10587 [(set_attr "type" "ssemuladd")
10588 (set_attr "mode" "TI")])
10589
10590 (define_insn "xop_pmacssdqh"
10591 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10592 (ss_plus:V2DI
10593 (mult:V2DI
10594 (sign_extend:V2DI
10595 (vec_select:V2SI
10596 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10597 (parallel [(const_int 0)
10598 (const_int 2)])))
10599 (sign_extend:V2DI
10600 (vec_select:V2SI
10601 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10602 (parallel [(const_int 0)
10603 (const_int 2)]))))
10604 (match_operand:V2DI 3 "register_operand" "x,x")))]
10605 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10606 "@
10607 vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10608 vpmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10609 [(set_attr "type" "ssemuladd")
10610 (set_attr "mode" "TI")])
10611
10612 (define_insn "xop_pmacsdql"
10613 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10614 (plus:V2DI
10615 (mult:V2DI
10616 (sign_extend:V2DI
10617 (vec_select:V2SI
10618 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10619 (parallel [(const_int 1)
10620 (const_int 3)])))
10621 (sign_extend:V2DI
10622 (vec_select:V2SI
10623 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10624 (parallel [(const_int 1)
10625 (const_int 3)]))))
10626 (match_operand:V2DI 3 "register_operand" "x,x")))]
10627 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10628 "@
10629 vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
10630 vpmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10631 [(set_attr "type" "ssemuladd")
10632 (set_attr "mode" "TI")])
10633
10634 (define_insn_and_split "*xop_pmacsdql_mem"
10635 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x")
10636 (plus:V2DI
10637 (mult:V2DI
10638 (sign_extend:V2DI
10639 (vec_select:V2SI
10640 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10641 (parallel [(const_int 1)
10642 (const_int 3)])))
10643 (sign_extend:V2DI
10644 (vec_select:V2SI
10645 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10646 (parallel [(const_int 1)
10647 (const_int 3)]))))
10648 (match_operand:V2DI 3 "memory_operand" "m,m")))]
10649 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
10650 "#"
10651 "&& reload_completed"
10652 [(set (match_dup 0)
10653 (match_dup 3))
10654 (set (match_dup 0)
10655 (plus:V2DI
10656 (mult:V2DI
10657 (sign_extend:V2DI
10658 (vec_select:V2SI
10659 (match_dup 1)
10660 (parallel [(const_int 1)
10661 (const_int 3)])))
10662 (sign_extend:V2DI
10663 (vec_select:V2SI
10664 (match_dup 2)
10665 (parallel [(const_int 1)
10666 (const_int 3)]))))
10667 (match_dup 0)))])
10668
10669 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10670 ;; fake it with a multiply/add. In general, we expect the define_split to
10671 ;; occur before register allocation, so we have to handle the corner case where
10672 ;; the target is the same as operands 1/2
10673 (define_insn_and_split "xop_mulv2div2di3_low"
10674 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10675 (mult:V2DI
10676 (sign_extend:V2DI
10677 (vec_select:V2SI
10678 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10679 (parallel [(const_int 1)
10680 (const_int 3)])))
10681 (sign_extend:V2DI
10682 (vec_select:V2SI
10683 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10684 (parallel [(const_int 1)
10685 (const_int 3)])))))]
10686 "TARGET_XOP"
10687 "#"
10688 "&& reload_completed"
10689 [(set (match_dup 0)
10690 (match_dup 3))
10691 (set (match_dup 0)
10692 (plus:V2DI
10693 (mult:V2DI
10694 (sign_extend:V2DI
10695 (vec_select:V2SI
10696 (match_dup 1)
10697 (parallel [(const_int 1)
10698 (const_int 3)])))
10699 (sign_extend:V2DI
10700 (vec_select:V2SI
10701 (match_dup 2)
10702 (parallel [(const_int 1)
10703 (const_int 3)]))))
10704 (match_dup 0)))]
10705 {
10706 operands[3] = CONST0_RTX (V2DImode);
10707 }
10708 [(set_attr "type" "ssemuladd")
10709 (set_attr "mode" "TI")])
10710
10711 (define_insn "xop_pmacsdqh"
10712 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10713 (plus:V2DI
10714 (mult:V2DI
10715 (sign_extend:V2DI
10716 (vec_select:V2SI
10717 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10718 (parallel [(const_int 0)
10719 (const_int 2)])))
10720 (sign_extend:V2DI
10721 (vec_select:V2SI
10722 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10723 (parallel [(const_int 0)
10724 (const_int 2)]))))
10725 (match_operand:V2DI 3 "register_operand" "x,x")))]
10726 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10727 "@
10728 vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10729 vpmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10730 [(set_attr "type" "ssemuladd")
10731 (set_attr "mode" "TI")])
10732
10733 (define_insn_and_split "*xop_pmacsdqh_mem"
10734 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x")
10735 (plus:V2DI
10736 (mult:V2DI
10737 (sign_extend:V2DI
10738 (vec_select:V2SI
10739 (match_operand:V4SI 1 "nonimmediate_operand" "%x,m")
10740 (parallel [(const_int 0)
10741 (const_int 2)])))
10742 (sign_extend:V2DI
10743 (vec_select:V2SI
10744 (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")
10745 (parallel [(const_int 0)
10746 (const_int 2)]))))
10747 (match_operand:V2DI 3 "memory_operand" "m,m")))]
10748 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)"
10749 "#"
10750 "&& reload_completed"
10751 [(set (match_dup 0)
10752 (match_dup 3))
10753 (set (match_dup 0)
10754 (plus:V2DI
10755 (mult:V2DI
10756 (sign_extend:V2DI
10757 (vec_select:V2SI
10758 (match_dup 1)
10759 (parallel [(const_int 0)
10760 (const_int 2)])))
10761 (sign_extend:V2DI
10762 (vec_select:V2SI
10763 (match_dup 2)
10764 (parallel [(const_int 0)
10765 (const_int 2)]))))
10766 (match_dup 0)))])
10767
10768 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10769 ;; fake it with a multiply/add. In general, we expect the define_split to
10770 ;; occur before register allocation, so we have to handle the corner case where
10771 ;; the target is the same as either operands[1] or operands[2]
10772 (define_insn_and_split "xop_mulv2div2di3_high"
10773 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10774 (mult:V2DI
10775 (sign_extend:V2DI
10776 (vec_select:V2SI
10777 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10778 (parallel [(const_int 0)
10779 (const_int 2)])))
10780 (sign_extend:V2DI
10781 (vec_select:V2SI
10782 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10783 (parallel [(const_int 0)
10784 (const_int 2)])))))]
10785 "TARGET_XOP"
10786 "#"
10787 "&& reload_completed"
10788 [(set (match_dup 0)
10789 (match_dup 3))
10790 (set (match_dup 0)
10791 (plus:V2DI
10792 (mult:V2DI
10793 (sign_extend:V2DI
10794 (vec_select:V2SI
10795 (match_dup 1)
10796 (parallel [(const_int 0)
10797 (const_int 2)])))
10798 (sign_extend:V2DI
10799 (vec_select:V2SI
10800 (match_dup 2)
10801 (parallel [(const_int 0)
10802 (const_int 2)]))))
10803 (match_dup 0)))]
10804 {
10805 operands[3] = CONST0_RTX (V2DImode);
10806 }
10807 [(set_attr "type" "ssemuladd")
10808 (set_attr "mode" "TI")])
10809
10810 ;; XOP parallel integer multiply/add instructions for the intrinisics
10811 (define_insn "xop_pmacsswd"
10812 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10813 (ss_plus:V4SI
10814 (mult:V4SI
10815 (sign_extend:V4SI
10816 (vec_select:V4HI
10817 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10818 (parallel [(const_int 1)
10819 (const_int 3)
10820 (const_int 5)
10821 (const_int 7)])))
10822 (sign_extend:V4SI
10823 (vec_select:V4HI
10824 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10825 (parallel [(const_int 1)
10826 (const_int 3)
10827 (const_int 5)
10828 (const_int 7)]))))
10829 (match_operand:V4SI 3 "register_operand" "x,x")))]
10830 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10831 "@
10832 vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10833 vpmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10834 [(set_attr "type" "ssemuladd")
10835 (set_attr "mode" "TI")])
10836
10837 (define_insn "xop_pmacswd"
10838 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10839 (plus:V4SI
10840 (mult:V4SI
10841 (sign_extend:V4SI
10842 (vec_select:V4HI
10843 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10844 (parallel [(const_int 1)
10845 (const_int 3)
10846 (const_int 5)
10847 (const_int 7)])))
10848 (sign_extend:V4SI
10849 (vec_select:V4HI
10850 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10851 (parallel [(const_int 1)
10852 (const_int 3)
10853 (const_int 5)
10854 (const_int 7)]))))
10855 (match_operand:V4SI 3 "register_operand" "x,x")))]
10856 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10857 "@
10858 vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10859 vpmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10860 [(set_attr "type" "ssemuladd")
10861 (set_attr "mode" "TI")])
10862
10863 (define_insn "xop_pmadcsswd"
10864 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10865 (ss_plus:V4SI
10866 (plus:V4SI
10867 (mult:V4SI
10868 (sign_extend:V4SI
10869 (vec_select:V4HI
10870 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10871 (parallel [(const_int 0)
10872 (const_int 2)
10873 (const_int 4)
10874 (const_int 6)])))
10875 (sign_extend:V4SI
10876 (vec_select:V4HI
10877 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10878 (parallel [(const_int 0)
10879 (const_int 2)
10880 (const_int 4)
10881 (const_int 6)]))))
10882 (mult:V4SI
10883 (sign_extend:V4SI
10884 (vec_select:V4HI
10885 (match_dup 1)
10886 (parallel [(const_int 1)
10887 (const_int 3)
10888 (const_int 5)
10889 (const_int 7)])))
10890 (sign_extend:V4SI
10891 (vec_select:V4HI
10892 (match_dup 2)
10893 (parallel [(const_int 1)
10894 (const_int 3)
10895 (const_int 5)
10896 (const_int 7)])))))
10897 (match_operand:V4SI 3 "register_operand" "x,x")))]
10898 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10899 "@
10900 vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10901 vpmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10902 [(set_attr "type" "ssemuladd")
10903 (set_attr "mode" "TI")])
10904
10905 (define_insn "xop_pmadcswd"
10906 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10907 (plus:V4SI
10908 (plus:V4SI
10909 (mult:V4SI
10910 (sign_extend:V4SI
10911 (vec_select:V4HI
10912 (match_operand:V8HI 1 "nonimmediate_operand" "%x,m")
10913 (parallel [(const_int 0)
10914 (const_int 2)
10915 (const_int 4)
10916 (const_int 6)])))
10917 (sign_extend:V4SI
10918 (vec_select:V4HI
10919 (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")
10920 (parallel [(const_int 0)
10921 (const_int 2)
10922 (const_int 4)
10923 (const_int 6)]))))
10924 (mult:V4SI
10925 (sign_extend:V4SI
10926 (vec_select:V4HI
10927 (match_dup 1)
10928 (parallel [(const_int 1)
10929 (const_int 3)
10930 (const_int 5)
10931 (const_int 7)])))
10932 (sign_extend:V4SI
10933 (vec_select:V4HI
10934 (match_dup 2)
10935 (parallel [(const_int 1)
10936 (const_int 3)
10937 (const_int 5)
10938 (const_int 7)])))))
10939 (match_operand:V4SI 3 "register_operand" "x,x")))]
10940 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)"
10941 "@
10942 vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10943 vpmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10944 [(set_attr "type" "ssemuladd")
10945 (set_attr "mode" "TI")])
10946
10947 ;; XOP parallel XMM conditional moves
10948 (define_insn "xop_pcmov_<mode>"
10949 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x")
10950 (if_then_else:SSEMODE
10951 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,x,m")
10952 (match_operand:SSEMODE 1 "vector_move_operand" "x,m,x")
10953 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x")))]
10954 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
10955 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10956 [(set_attr "type" "sse4arg")])
10957
10958 (define_insn "xop_pcmov_<mode>256"
10959 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
10960 (if_then_else:AVX256MODE
10961 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,x,m")
10962 (match_operand:AVX256MODE 1 "vector_move_operand" "x,m,x")
10963 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x,x")))]
10964 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
10965 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10966 [(set_attr "type" "sse4arg")])
10967
10968 ;; XOP horizontal add/subtract instructions
10969 (define_insn "xop_phaddbw"
10970 [(set (match_operand:V8HI 0 "register_operand" "=x")
10971 (plus:V8HI
10972 (sign_extend:V8HI
10973 (vec_select:V8QI
10974 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10975 (parallel [(const_int 0)
10976 (const_int 2)
10977 (const_int 4)
10978 (const_int 6)
10979 (const_int 8)
10980 (const_int 10)
10981 (const_int 12)
10982 (const_int 14)])))
10983 (sign_extend:V8HI
10984 (vec_select:V8QI
10985 (match_dup 1)
10986 (parallel [(const_int 1)
10987 (const_int 3)
10988 (const_int 5)
10989 (const_int 7)
10990 (const_int 9)
10991 (const_int 11)
10992 (const_int 13)
10993 (const_int 15)])))))]
10994 "TARGET_XOP"
10995 "vphaddbw\t{%1, %0|%0, %1}"
10996 [(set_attr "type" "sseiadd1")])
10997
10998 (define_insn "xop_phaddbd"
10999 [(set (match_operand:V4SI 0 "register_operand" "=x")
11000 (plus:V4SI
11001 (plus:V4SI
11002 (sign_extend:V4SI
11003 (vec_select:V4QI
11004 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11005 (parallel [(const_int 0)
11006 (const_int 4)
11007 (const_int 8)
11008 (const_int 12)])))
11009 (sign_extend:V4SI
11010 (vec_select:V4QI
11011 (match_dup 1)
11012 (parallel [(const_int 1)
11013 (const_int 5)
11014 (const_int 9)
11015 (const_int 13)]))))
11016 (plus:V4SI
11017 (sign_extend:V4SI
11018 (vec_select:V4QI
11019 (match_dup 1)
11020 (parallel [(const_int 2)
11021 (const_int 6)
11022 (const_int 10)
11023 (const_int 14)])))
11024 (sign_extend:V4SI
11025 (vec_select:V4QI
11026 (match_dup 1)
11027 (parallel [(const_int 3)
11028 (const_int 7)
11029 (const_int 11)
11030 (const_int 15)]))))))]
11031 "TARGET_XOP"
11032 "vphaddbd\t{%1, %0|%0, %1}"
11033 [(set_attr "type" "sseiadd1")])
11034
11035 (define_insn "xop_phaddbq"
11036 [(set (match_operand:V2DI 0 "register_operand" "=x")
11037 (plus:V2DI
11038 (plus:V2DI
11039 (plus:V2DI
11040 (sign_extend:V2DI
11041 (vec_select:V2QI
11042 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11043 (parallel [(const_int 0)
11044 (const_int 4)])))
11045 (sign_extend:V2DI
11046 (vec_select:V2QI
11047 (match_dup 1)
11048 (parallel [(const_int 1)
11049 (const_int 5)]))))
11050 (plus:V2DI
11051 (sign_extend:V2DI
11052 (vec_select:V2QI
11053 (match_dup 1)
11054 (parallel [(const_int 2)
11055 (const_int 6)])))
11056 (sign_extend:V2DI
11057 (vec_select:V2QI
11058 (match_dup 1)
11059 (parallel [(const_int 3)
11060 (const_int 7)])))))
11061 (plus:V2DI
11062 (plus:V2DI
11063 (sign_extend:V2DI
11064 (vec_select:V2QI
11065 (match_dup 1)
11066 (parallel [(const_int 8)
11067 (const_int 12)])))
11068 (sign_extend:V2DI
11069 (vec_select:V2QI
11070 (match_dup 1)
11071 (parallel [(const_int 9)
11072 (const_int 13)]))))
11073 (plus:V2DI
11074 (sign_extend:V2DI
11075 (vec_select:V2QI
11076 (match_dup 1)
11077 (parallel [(const_int 10)
11078 (const_int 14)])))
11079 (sign_extend:V2DI
11080 (vec_select:V2QI
11081 (match_dup 1)
11082 (parallel [(const_int 11)
11083 (const_int 15)])))))))]
11084 "TARGET_XOP"
11085 "vphaddbq\t{%1, %0|%0, %1}"
11086 [(set_attr "type" "sseiadd1")])
11087
11088 (define_insn "xop_phaddwd"
11089 [(set (match_operand:V4SI 0 "register_operand" "=x")
11090 (plus:V4SI
11091 (sign_extend:V4SI
11092 (vec_select:V4HI
11093 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11094 (parallel [(const_int 0)
11095 (const_int 2)
11096 (const_int 4)
11097 (const_int 6)])))
11098 (sign_extend:V4SI
11099 (vec_select:V4HI
11100 (match_dup 1)
11101 (parallel [(const_int 1)
11102 (const_int 3)
11103 (const_int 5)
11104 (const_int 7)])))))]
11105 "TARGET_XOP"
11106 "vphaddwd\t{%1, %0|%0, %1}"
11107 [(set_attr "type" "sseiadd1")])
11108
11109 (define_insn "xop_phaddwq"
11110 [(set (match_operand:V2DI 0 "register_operand" "=x")
11111 (plus:V2DI
11112 (plus:V2DI
11113 (sign_extend:V2DI
11114 (vec_select:V2HI
11115 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11116 (parallel [(const_int 0)
11117 (const_int 4)])))
11118 (sign_extend:V2DI
11119 (vec_select:V2HI
11120 (match_dup 1)
11121 (parallel [(const_int 1)
11122 (const_int 5)]))))
11123 (plus:V2DI
11124 (sign_extend:V2DI
11125 (vec_select:V2HI
11126 (match_dup 1)
11127 (parallel [(const_int 2)
11128 (const_int 6)])))
11129 (sign_extend:V2DI
11130 (vec_select:V2HI
11131 (match_dup 1)
11132 (parallel [(const_int 3)
11133 (const_int 7)]))))))]
11134 "TARGET_XOP"
11135 "vphaddwq\t{%1, %0|%0, %1}"
11136 [(set_attr "type" "sseiadd1")])
11137
11138 (define_insn "xop_phadddq"
11139 [(set (match_operand:V2DI 0 "register_operand" "=x")
11140 (plus:V2DI
11141 (sign_extend:V2DI
11142 (vec_select:V2SI
11143 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11144 (parallel [(const_int 0)
11145 (const_int 2)])))
11146 (sign_extend:V2DI
11147 (vec_select:V2SI
11148 (match_dup 1)
11149 (parallel [(const_int 1)
11150 (const_int 3)])))))]
11151 "TARGET_XOP"
11152 "vphadddq\t{%1, %0|%0, %1}"
11153 [(set_attr "type" "sseiadd1")])
11154
11155 (define_insn "xop_phaddubw"
11156 [(set (match_operand:V8HI 0 "register_operand" "=x")
11157 (plus:V8HI
11158 (zero_extend:V8HI
11159 (vec_select:V8QI
11160 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11161 (parallel [(const_int 0)
11162 (const_int 2)
11163 (const_int 4)
11164 (const_int 6)
11165 (const_int 8)
11166 (const_int 10)
11167 (const_int 12)
11168 (const_int 14)])))
11169 (zero_extend:V8HI
11170 (vec_select:V8QI
11171 (match_dup 1)
11172 (parallel [(const_int 1)
11173 (const_int 3)
11174 (const_int 5)
11175 (const_int 7)
11176 (const_int 9)
11177 (const_int 11)
11178 (const_int 13)
11179 (const_int 15)])))))]
11180 "TARGET_XOP"
11181 "vphaddubw\t{%1, %0|%0, %1}"
11182 [(set_attr "type" "sseiadd1")])
11183
11184 (define_insn "xop_phaddubd"
11185 [(set (match_operand:V4SI 0 "register_operand" "=x")
11186 (plus:V4SI
11187 (plus:V4SI
11188 (zero_extend:V4SI
11189 (vec_select:V4QI
11190 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11191 (parallel [(const_int 0)
11192 (const_int 4)
11193 (const_int 8)
11194 (const_int 12)])))
11195 (zero_extend:V4SI
11196 (vec_select:V4QI
11197 (match_dup 1)
11198 (parallel [(const_int 1)
11199 (const_int 5)
11200 (const_int 9)
11201 (const_int 13)]))))
11202 (plus:V4SI
11203 (zero_extend:V4SI
11204 (vec_select:V4QI
11205 (match_dup 1)
11206 (parallel [(const_int 2)
11207 (const_int 6)
11208 (const_int 10)
11209 (const_int 14)])))
11210 (zero_extend:V4SI
11211 (vec_select:V4QI
11212 (match_dup 1)
11213 (parallel [(const_int 3)
11214 (const_int 7)
11215 (const_int 11)
11216 (const_int 15)]))))))]
11217 "TARGET_XOP"
11218 "vphaddubd\t{%1, %0|%0, %1}"
11219 [(set_attr "type" "sseiadd1")])
11220
11221 (define_insn "xop_phaddubq"
11222 [(set (match_operand:V2DI 0 "register_operand" "=x")
11223 (plus:V2DI
11224 (plus:V2DI
11225 (plus:V2DI
11226 (zero_extend:V2DI
11227 (vec_select:V2QI
11228 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11229 (parallel [(const_int 0)
11230 (const_int 4)])))
11231 (sign_extend:V2DI
11232 (vec_select:V2QI
11233 (match_dup 1)
11234 (parallel [(const_int 1)
11235 (const_int 5)]))))
11236 (plus:V2DI
11237 (zero_extend:V2DI
11238 (vec_select:V2QI
11239 (match_dup 1)
11240 (parallel [(const_int 2)
11241 (const_int 6)])))
11242 (zero_extend:V2DI
11243 (vec_select:V2QI
11244 (match_dup 1)
11245 (parallel [(const_int 3)
11246 (const_int 7)])))))
11247 (plus:V2DI
11248 (plus:V2DI
11249 (zero_extend:V2DI
11250 (vec_select:V2QI
11251 (match_dup 1)
11252 (parallel [(const_int 8)
11253 (const_int 12)])))
11254 (sign_extend:V2DI
11255 (vec_select:V2QI
11256 (match_dup 1)
11257 (parallel [(const_int 9)
11258 (const_int 13)]))))
11259 (plus:V2DI
11260 (zero_extend:V2DI
11261 (vec_select:V2QI
11262 (match_dup 1)
11263 (parallel [(const_int 10)
11264 (const_int 14)])))
11265 (zero_extend:V2DI
11266 (vec_select:V2QI
11267 (match_dup 1)
11268 (parallel [(const_int 11)
11269 (const_int 15)])))))))]
11270 "TARGET_XOP"
11271 "vphaddubq\t{%1, %0|%0, %1}"
11272 [(set_attr "type" "sseiadd1")])
11273
11274 (define_insn "xop_phadduwd"
11275 [(set (match_operand:V4SI 0 "register_operand" "=x")
11276 (plus:V4SI
11277 (zero_extend:V4SI
11278 (vec_select:V4HI
11279 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11280 (parallel [(const_int 0)
11281 (const_int 2)
11282 (const_int 4)
11283 (const_int 6)])))
11284 (zero_extend:V4SI
11285 (vec_select:V4HI
11286 (match_dup 1)
11287 (parallel [(const_int 1)
11288 (const_int 3)
11289 (const_int 5)
11290 (const_int 7)])))))]
11291 "TARGET_XOP"
11292 "vphadduwd\t{%1, %0|%0, %1}"
11293 [(set_attr "type" "sseiadd1")])
11294
11295 (define_insn "xop_phadduwq"
11296 [(set (match_operand:V2DI 0 "register_operand" "=x")
11297 (plus:V2DI
11298 (plus:V2DI
11299 (zero_extend:V2DI
11300 (vec_select:V2HI
11301 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11302 (parallel [(const_int 0)
11303 (const_int 4)])))
11304 (zero_extend:V2DI
11305 (vec_select:V2HI
11306 (match_dup 1)
11307 (parallel [(const_int 1)
11308 (const_int 5)]))))
11309 (plus:V2DI
11310 (zero_extend:V2DI
11311 (vec_select:V2HI
11312 (match_dup 1)
11313 (parallel [(const_int 2)
11314 (const_int 6)])))
11315 (zero_extend:V2DI
11316 (vec_select:V2HI
11317 (match_dup 1)
11318 (parallel [(const_int 3)
11319 (const_int 7)]))))))]
11320 "TARGET_XOP"
11321 "vphadduwq\t{%1, %0|%0, %1}"
11322 [(set_attr "type" "sseiadd1")])
11323
11324 (define_insn "xop_phaddudq"
11325 [(set (match_operand:V2DI 0 "register_operand" "=x")
11326 (plus:V2DI
11327 (zero_extend:V2DI
11328 (vec_select:V2SI
11329 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11330 (parallel [(const_int 0)
11331 (const_int 2)])))
11332 (zero_extend:V2DI
11333 (vec_select:V2SI
11334 (match_dup 1)
11335 (parallel [(const_int 1)
11336 (const_int 3)])))))]
11337 "TARGET_XOP"
11338 "vphaddudq\t{%1, %0|%0, %1}"
11339 [(set_attr "type" "sseiadd1")])
11340
11341 (define_insn "xop_phsubbw"
11342 [(set (match_operand:V8HI 0 "register_operand" "=x")
11343 (minus:V8HI
11344 (sign_extend:V8HI
11345 (vec_select:V8QI
11346 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11347 (parallel [(const_int 0)
11348 (const_int 2)
11349 (const_int 4)
11350 (const_int 6)
11351 (const_int 8)
11352 (const_int 10)
11353 (const_int 12)
11354 (const_int 14)])))
11355 (sign_extend:V8HI
11356 (vec_select:V8QI
11357 (match_dup 1)
11358 (parallel [(const_int 1)
11359 (const_int 3)
11360 (const_int 5)
11361 (const_int 7)
11362 (const_int 9)
11363 (const_int 11)
11364 (const_int 13)
11365 (const_int 15)])))))]
11366 "TARGET_XOP"
11367 "vphsubbw\t{%1, %0|%0, %1}"
11368 [(set_attr "type" "sseiadd1")])
11369
11370 (define_insn "xop_phsubwd"
11371 [(set (match_operand:V4SI 0 "register_operand" "=x")
11372 (minus:V4SI
11373 (sign_extend:V4SI
11374 (vec_select:V4HI
11375 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11376 (parallel [(const_int 0)
11377 (const_int 2)
11378 (const_int 4)
11379 (const_int 6)])))
11380 (sign_extend:V4SI
11381 (vec_select:V4HI
11382 (match_dup 1)
11383 (parallel [(const_int 1)
11384 (const_int 3)
11385 (const_int 5)
11386 (const_int 7)])))))]
11387 "TARGET_XOP"
11388 "vphsubwd\t{%1, %0|%0, %1}"
11389 [(set_attr "type" "sseiadd1")])
11390
11391 (define_insn "xop_phsubdq"
11392 [(set (match_operand:V2DI 0 "register_operand" "=x")
11393 (minus:V2DI
11394 (sign_extend:V2DI
11395 (vec_select:V2SI
11396 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11397 (parallel [(const_int 0)
11398 (const_int 2)])))
11399 (sign_extend:V2DI
11400 (vec_select:V2SI
11401 (match_dup 1)
11402 (parallel [(const_int 1)
11403 (const_int 3)])))))]
11404 "TARGET_XOP"
11405 "vphsubdq\t{%1, %0|%0, %1}"
11406 [(set_attr "type" "sseiadd1")])
11407
11408 ;; XOP permute instructions
11409 (define_insn "xop_pperm"
11410 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
11411 (unspec:V16QI
11412 [(match_operand:V16QI 1 "nonimmediate_operand" "x,x,m")
11413 (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x")
11414 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x")]
11415 UNSPEC_XOP_PERMUTE))]
11416 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11417 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11418 [(set_attr "type" "sse4arg")
11419 (set_attr "mode" "TI")])
11420
11421 ;; XOP pack instructions that combine two vectors into a smaller vector
11422 (define_insn "xop_pperm_pack_v2di_v4si"
11423 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
11424 (vec_concat:V4SI
11425 (truncate:V2SI
11426 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,m"))
11427 (truncate:V2SI
11428 (match_operand:V2DI 2 "nonimmediate_operand" "x,m,x"))))
11429 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
11430 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11431 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11432 [(set_attr "type" "sse4arg")
11433 (set_attr "mode" "TI")])
11434
11435 (define_insn "xop_pperm_pack_v4si_v8hi"
11436 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
11437 (vec_concat:V8HI
11438 (truncate:V4HI
11439 (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m"))
11440 (truncate:V4HI
11441 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))))
11442 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
11443 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11444 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11445 [(set_attr "type" "sse4arg")
11446 (set_attr "mode" "TI")])
11447
11448 (define_insn "xop_pperm_pack_v8hi_v16qi"
11449 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x")
11450 (vec_concat:V16QI
11451 (truncate:V8QI
11452 (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m"))
11453 (truncate:V8QI
11454 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))))
11455 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))]
11456 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
11457 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11458 [(set_attr "type" "sse4arg")
11459 (set_attr "mode" "TI")])
11460
11461 ;; XOP packed rotate instructions
11462 (define_expand "rotl<mode>3"
11463 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11464 (rotate:SSEMODE1248
11465 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11466 (match_operand:SI 2 "general_operand")))]
11467 "TARGET_XOP"
11468 {
11469 /* If we were given a scalar, convert it to parallel */
11470 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11471 {
11472 rtvec vs = rtvec_alloc (<ssescalarnum>);
11473 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11474 rtx reg = gen_reg_rtx (<MODE>mode);
11475 rtx op2 = operands[2];
11476 int i;
11477
11478 if (GET_MODE (op2) != <ssescalarmode>mode)
11479 {
11480 op2 = gen_reg_rtx (<ssescalarmode>mode);
11481 convert_move (op2, operands[2], false);
11482 }
11483
11484 for (i = 0; i < <ssescalarnum>; i++)
11485 RTVEC_ELT (vs, i) = op2;
11486
11487 emit_insn (gen_vec_init<mode> (reg, par));
11488 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11489 DONE;
11490 }
11491 })
11492
11493 (define_expand "rotr<mode>3"
11494 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11495 (rotatert:SSEMODE1248
11496 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11497 (match_operand:SI 2 "general_operand")))]
11498 "TARGET_XOP"
11499 {
11500 /* If we were given a scalar, convert it to parallel */
11501 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11502 {
11503 rtvec vs = rtvec_alloc (<ssescalarnum>);
11504 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11505 rtx neg = gen_reg_rtx (<MODE>mode);
11506 rtx reg = gen_reg_rtx (<MODE>mode);
11507 rtx op2 = operands[2];
11508 int i;
11509
11510 if (GET_MODE (op2) != <ssescalarmode>mode)
11511 {
11512 op2 = gen_reg_rtx (<ssescalarmode>mode);
11513 convert_move (op2, operands[2], false);
11514 }
11515
11516 for (i = 0; i < <ssescalarnum>; i++)
11517 RTVEC_ELT (vs, i) = op2;
11518
11519 emit_insn (gen_vec_init<mode> (reg, par));
11520 emit_insn (gen_neg<mode>2 (neg, reg));
11521 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11522 DONE;
11523 }
11524 })
11525
11526 (define_insn "xop_rotl<mode>3"
11527 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11528 (rotate:SSEMODE1248
11529 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11530 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11531 "TARGET_XOP"
11532 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11533 [(set_attr "type" "sseishft")
11534 (set_attr "length_immediate" "1")
11535 (set_attr "mode" "TI")])
11536
11537 (define_insn "xop_rotr<mode>3"
11538 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11539 (rotatert:SSEMODE1248
11540 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11541 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11542 "TARGET_XOP"
11543 {
11544 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11545 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11546 }
11547 [(set_attr "type" "sseishft")
11548 (set_attr "length_immediate" "1")
11549 (set_attr "mode" "TI")])
11550
11551 (define_expand "vrotr<mode>3"
11552 [(match_operand:SSEMODE1248 0 "register_operand" "")
11553 (match_operand:SSEMODE1248 1 "register_operand" "")
11554 (match_operand:SSEMODE1248 2 "register_operand" "")]
11555 "TARGET_XOP"
11556 {
11557 rtx reg = gen_reg_rtx (<MODE>mode);
11558 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11559 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11560 DONE;
11561 })
11562
11563 (define_expand "vrotl<mode>3"
11564 [(match_operand:SSEMODE1248 0 "register_operand" "")
11565 (match_operand:SSEMODE1248 1 "register_operand" "")
11566 (match_operand:SSEMODE1248 2 "register_operand" "")]
11567 "TARGET_XOP"
11568 {
11569 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11570 DONE;
11571 })
11572
11573 (define_insn "xop_vrotl<mode>3"
11574 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11575 (if_then_else:SSEMODE1248
11576 (ge:SSEMODE1248
11577 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11578 (const_int 0))
11579 (rotate:SSEMODE1248
11580 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11581 (match_dup 2))
11582 (rotatert:SSEMODE1248
11583 (match_dup 1)
11584 (neg:SSEMODE1248 (match_dup 2)))))]
11585 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
11586 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11587 [(set_attr "type" "sseishft")
11588 (set_attr "prefix_data16" "0")
11589 (set_attr "prefix_extra" "2")
11590 (set_attr "mode" "TI")])
11591
11592 ;; XOP packed shift instructions.
11593 ;; FIXME: add V2DI back in
11594 (define_expand "vlshr<mode>3"
11595 [(match_operand:SSEMODE124 0 "register_operand" "")
11596 (match_operand:SSEMODE124 1 "register_operand" "")
11597 (match_operand:SSEMODE124 2 "register_operand" "")]
11598 "TARGET_XOP"
11599 {
11600 rtx neg = gen_reg_rtx (<MODE>mode);
11601 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11602 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11603 DONE;
11604 })
11605
11606 (define_expand "vashr<mode>3"
11607 [(match_operand:SSEMODE124 0 "register_operand" "")
11608 (match_operand:SSEMODE124 1 "register_operand" "")
11609 (match_operand:SSEMODE124 2 "register_operand" "")]
11610 "TARGET_XOP"
11611 {
11612 rtx neg = gen_reg_rtx (<MODE>mode);
11613 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11614 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11615 DONE;
11616 })
11617
11618 (define_expand "vashl<mode>3"
11619 [(match_operand:SSEMODE124 0 "register_operand" "")
11620 (match_operand:SSEMODE124 1 "register_operand" "")
11621 (match_operand:SSEMODE124 2 "register_operand" "")]
11622 "TARGET_XOP"
11623 {
11624 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11625 DONE;
11626 })
11627
11628 (define_insn "xop_ashl<mode>3"
11629 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11630 (if_then_else:SSEMODE1248
11631 (ge:SSEMODE1248
11632 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11633 (const_int 0))
11634 (ashift:SSEMODE1248
11635 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11636 (match_dup 2))
11637 (ashiftrt:SSEMODE1248
11638 (match_dup 1)
11639 (neg:SSEMODE1248 (match_dup 2)))))]
11640 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
11641 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11642 [(set_attr "type" "sseishft")
11643 (set_attr "prefix_data16" "0")
11644 (set_attr "prefix_extra" "2")
11645 (set_attr "mode" "TI")])
11646
11647 (define_insn "xop_lshl<mode>3"
11648 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11649 (if_then_else:SSEMODE1248
11650 (ge:SSEMODE1248
11651 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11652 (const_int 0))
11653 (ashift:SSEMODE1248
11654 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11655 (match_dup 2))
11656 (lshiftrt:SSEMODE1248
11657 (match_dup 1)
11658 (neg:SSEMODE1248 (match_dup 2)))))]
11659 "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)"
11660 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11661 [(set_attr "type" "sseishft")
11662 (set_attr "prefix_data16" "0")
11663 (set_attr "prefix_extra" "2")
11664 (set_attr "mode" "TI")])
11665
11666 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11667 (define_expand "ashlv16qi3"
11668 [(match_operand:V16QI 0 "register_operand" "")
11669 (match_operand:V16QI 1 "register_operand" "")
11670 (match_operand:SI 2 "nonmemory_operand" "")]
11671 "TARGET_XOP"
11672 {
11673 rtvec vs = rtvec_alloc (16);
11674 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11675 rtx reg = gen_reg_rtx (V16QImode);
11676 int i;
11677 for (i = 0; i < 16; i++)
11678 RTVEC_ELT (vs, i) = operands[2];
11679
11680 emit_insn (gen_vec_initv16qi (reg, par));
11681 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11682 DONE;
11683 })
11684
11685 (define_expand "lshlv16qi3"
11686 [(match_operand:V16QI 0 "register_operand" "")
11687 (match_operand:V16QI 1 "register_operand" "")
11688 (match_operand:SI 2 "nonmemory_operand" "")]
11689 "TARGET_XOP"
11690 {
11691 rtvec vs = rtvec_alloc (16);
11692 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11693 rtx reg = gen_reg_rtx (V16QImode);
11694 int i;
11695 for (i = 0; i < 16; i++)
11696 RTVEC_ELT (vs, i) = operands[2];
11697
11698 emit_insn (gen_vec_initv16qi (reg, par));
11699 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11700 DONE;
11701 })
11702
11703 (define_expand "ashrv16qi3"
11704 [(match_operand:V16QI 0 "register_operand" "")
11705 (match_operand:V16QI 1 "register_operand" "")
11706 (match_operand:SI 2 "nonmemory_operand" "")]
11707 "TARGET_XOP"
11708 {
11709 rtvec vs = rtvec_alloc (16);
11710 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11711 rtx reg = gen_reg_rtx (V16QImode);
11712 int i;
11713 rtx ele = ((CONST_INT_P (operands[2]))
11714 ? GEN_INT (- INTVAL (operands[2]))
11715 : operands[2]);
11716
11717 for (i = 0; i < 16; i++)
11718 RTVEC_ELT (vs, i) = ele;
11719
11720 emit_insn (gen_vec_initv16qi (reg, par));
11721
11722 if (!CONST_INT_P (operands[2]))
11723 {
11724 rtx neg = gen_reg_rtx (V16QImode);
11725 emit_insn (gen_negv16qi2 (neg, reg));
11726 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11727 }
11728 else
11729 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11730
11731 DONE;
11732 })
11733
11734 (define_expand "ashrv2di3"
11735 [(match_operand:V2DI 0 "register_operand" "")
11736 (match_operand:V2DI 1 "register_operand" "")
11737 (match_operand:DI 2 "nonmemory_operand" "")]
11738 "TARGET_XOP"
11739 {
11740 rtvec vs = rtvec_alloc (2);
11741 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11742 rtx reg = gen_reg_rtx (V2DImode);
11743 rtx ele;
11744
11745 if (CONST_INT_P (operands[2]))
11746 ele = GEN_INT (- INTVAL (operands[2]));
11747 else if (GET_MODE (operands[2]) != DImode)
11748 {
11749 rtx move = gen_reg_rtx (DImode);
11750 ele = gen_reg_rtx (DImode);
11751 convert_move (move, operands[2], false);
11752 emit_insn (gen_negdi2 (ele, move));
11753 }
11754 else
11755 {
11756 ele = gen_reg_rtx (DImode);
11757 emit_insn (gen_negdi2 (ele, operands[2]));
11758 }
11759
11760 RTVEC_ELT (vs, 0) = ele;
11761 RTVEC_ELT (vs, 1) = ele;
11762 emit_insn (gen_vec_initv2di (reg, par));
11763 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11764 DONE;
11765 })
11766
11767 ;; XOP FRCZ support
11768 ;; parallel insns
11769 (define_insn "xop_frcz<mode>2"
11770 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11771 (unspec:SSEMODEF2P
11772 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11773 UNSPEC_FRCZ))]
11774 "TARGET_XOP"
11775 "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11776 [(set_attr "type" "ssecvt1")
11777 (set_attr "mode" "<MODE>")])
11778
11779 ;; scalar insns
11780 (define_insn "xop_vmfrcz<mode>2"
11781 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11782 (vec_merge:SSEMODEF2P
11783 (unspec:SSEMODEF2P
11784 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11785 UNSPEC_FRCZ)
11786 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11787 (const_int 1)))]
11788 "TARGET_XOP"
11789 "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11790 [(set_attr "type" "ssecvt1")
11791 (set_attr "mode" "<MODE>")])
11792
11793 (define_insn "xop_frcz<mode>2256"
11794 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11795 (unspec:FMA4MODEF4
11796 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11797 UNSPEC_FRCZ))]
11798 "TARGET_XOP"
11799 "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}"
11800 [(set_attr "type" "ssecvt1")
11801 (set_attr "mode" "<MODE>")])
11802
11803 (define_insn "xop_maskcmp<mode>3"
11804 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11805 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11806 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11807 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11808 "TARGET_XOP"
11809 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11810 [(set_attr "type" "sse4arg")
11811 (set_attr "prefix_data16" "0")
11812 (set_attr "prefix_rep" "0")
11813 (set_attr "prefix_extra" "2")
11814 (set_attr "length_immediate" "1")
11815 (set_attr "mode" "TI")])
11816
11817 (define_insn "xop_maskcmp_uns<mode>3"
11818 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11819 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11820 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11821 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11822 "TARGET_XOP"
11823 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11824 [(set_attr "type" "ssecmp")
11825 (set_attr "prefix_data16" "0")
11826 (set_attr "prefix_rep" "0")
11827 (set_attr "prefix_extra" "2")
11828 (set_attr "length_immediate" "1")
11829 (set_attr "mode" "TI")])
11830
11831 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11832 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11833 ;; the exact instruction generated for the intrinsic.
11834 (define_insn "xop_maskcmp_uns2<mode>3"
11835 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11836 (unspec:SSEMODE1248
11837 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11838 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11839 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11840 UNSPEC_XOP_UNSIGNED_CMP))]
11841 "TARGET_XOP"
11842 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11843 [(set_attr "type" "ssecmp")
11844 (set_attr "prefix_data16" "0")
11845 (set_attr "prefix_extra" "2")
11846 (set_attr "length_immediate" "1")
11847 (set_attr "mode" "TI")])
11848
11849 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11850 ;; being added here to be complete.
11851 (define_insn "xop_pcom_tf<mode>3"
11852 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11853 (unspec:SSEMODE1248
11854 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11855 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11856 (match_operand:SI 3 "const_int_operand" "n")]
11857 UNSPEC_XOP_TRUEFALSE))]
11858 "TARGET_XOP"
11859 {
11860 return ((INTVAL (operands[3]) != 0)
11861 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11862 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11863 }
11864 [(set_attr "type" "ssecmp")
11865 (set_attr "prefix_data16" "0")
11866 (set_attr "prefix_extra" "2")
11867 (set_attr "length_immediate" "1")
11868 (set_attr "mode" "TI")])
11869
11870 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11871 (define_insn "*avx_aesenc"
11872 [(set (match_operand:V2DI 0 "register_operand" "=x")
11873 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11874 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11875 UNSPEC_AESENC))]
11876 "TARGET_AES && TARGET_AVX"
11877 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11878 [(set_attr "type" "sselog1")
11879 (set_attr "prefix_extra" "1")
11880 (set_attr "prefix" "vex")
11881 (set_attr "mode" "TI")])
11882
11883 (define_insn "aesenc"
11884 [(set (match_operand:V2DI 0 "register_operand" "=x")
11885 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11886 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11887 UNSPEC_AESENC))]
11888 "TARGET_AES"
11889 "aesenc\t{%2, %0|%0, %2}"
11890 [(set_attr "type" "sselog1")
11891 (set_attr "prefix_extra" "1")
11892 (set_attr "mode" "TI")])
11893
11894 (define_insn "*avx_aesenclast"
11895 [(set (match_operand:V2DI 0 "register_operand" "=x")
11896 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11897 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11898 UNSPEC_AESENCLAST))]
11899 "TARGET_AES && TARGET_AVX"
11900 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11901 [(set_attr "type" "sselog1")
11902 (set_attr "prefix_extra" "1")
11903 (set_attr "prefix" "vex")
11904 (set_attr "mode" "TI")])
11905
11906 (define_insn "aesenclast"
11907 [(set (match_operand:V2DI 0 "register_operand" "=x")
11908 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11909 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11910 UNSPEC_AESENCLAST))]
11911 "TARGET_AES"
11912 "aesenclast\t{%2, %0|%0, %2}"
11913 [(set_attr "type" "sselog1")
11914 (set_attr "prefix_extra" "1")
11915 (set_attr "mode" "TI")])
11916
11917 (define_insn "*avx_aesdec"
11918 [(set (match_operand:V2DI 0 "register_operand" "=x")
11919 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11920 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11921 UNSPEC_AESDEC))]
11922 "TARGET_AES && TARGET_AVX"
11923 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11924 [(set_attr "type" "sselog1")
11925 (set_attr "prefix_extra" "1")
11926 (set_attr "prefix" "vex")
11927 (set_attr "mode" "TI")])
11928
11929 (define_insn "aesdec"
11930 [(set (match_operand:V2DI 0 "register_operand" "=x")
11931 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11932 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11933 UNSPEC_AESDEC))]
11934 "TARGET_AES"
11935 "aesdec\t{%2, %0|%0, %2}"
11936 [(set_attr "type" "sselog1")
11937 (set_attr "prefix_extra" "1")
11938 (set_attr "mode" "TI")])
11939
11940 (define_insn "*avx_aesdeclast"
11941 [(set (match_operand:V2DI 0 "register_operand" "=x")
11942 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11943 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11944 UNSPEC_AESDECLAST))]
11945 "TARGET_AES && TARGET_AVX"
11946 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11947 [(set_attr "type" "sselog1")
11948 (set_attr "prefix_extra" "1")
11949 (set_attr "prefix" "vex")
11950 (set_attr "mode" "TI")])
11951
11952 (define_insn "aesdeclast"
11953 [(set (match_operand:V2DI 0 "register_operand" "=x")
11954 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11955 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11956 UNSPEC_AESDECLAST))]
11957 "TARGET_AES"
11958 "aesdeclast\t{%2, %0|%0, %2}"
11959 [(set_attr "type" "sselog1")
11960 (set_attr "prefix_extra" "1")
11961 (set_attr "mode" "TI")])
11962
11963 (define_insn "aesimc"
11964 [(set (match_operand:V2DI 0 "register_operand" "=x")
11965 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11966 UNSPEC_AESIMC))]
11967 "TARGET_AES"
11968 "%vaesimc\t{%1, %0|%0, %1}"
11969 [(set_attr "type" "sselog1")
11970 (set_attr "prefix_extra" "1")
11971 (set_attr "prefix" "maybe_vex")
11972 (set_attr "mode" "TI")])
11973
11974 (define_insn "aeskeygenassist"
11975 [(set (match_operand:V2DI 0 "register_operand" "=x")
11976 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11977 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11978 UNSPEC_AESKEYGENASSIST))]
11979 "TARGET_AES"
11980 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11981 [(set_attr "type" "sselog1")
11982 (set_attr "prefix_extra" "1")
11983 (set_attr "length_immediate" "1")
11984 (set_attr "prefix" "maybe_vex")
11985 (set_attr "mode" "TI")])
11986
11987 (define_insn "*vpclmulqdq"
11988 [(set (match_operand:V2DI 0 "register_operand" "=x")
11989 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11990 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11991 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11992 UNSPEC_PCLMUL))]
11993 "TARGET_PCLMUL && TARGET_AVX"
11994 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11995 [(set_attr "type" "sselog1")
11996 (set_attr "prefix_extra" "1")
11997 (set_attr "length_immediate" "1")
11998 (set_attr "prefix" "vex")
11999 (set_attr "mode" "TI")])
12000
12001 (define_insn "pclmulqdq"
12002 [(set (match_operand:V2DI 0 "register_operand" "=x")
12003 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
12004 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12005 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12006 UNSPEC_PCLMUL))]
12007 "TARGET_PCLMUL"
12008 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
12009 [(set_attr "type" "sselog1")
12010 (set_attr "prefix_extra" "1")
12011 (set_attr "length_immediate" "1")
12012 (set_attr "mode" "TI")])
12013
12014 (define_expand "avx_vzeroall"
12015 [(match_par_dup 0 [(const_int 0)])]
12016 "TARGET_AVX"
12017 {
12018 int nregs = TARGET_64BIT ? 16 : 8;
12019 int regno;
12020
12021 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
12022
12023 XVECEXP (operands[0], 0, 0)
12024 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
12025 UNSPECV_VZEROALL);
12026
12027 for (regno = 0; regno < nregs; regno++)
12028 XVECEXP (operands[0], 0, regno + 1)
12029 = gen_rtx_SET (VOIDmode,
12030 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
12031 CONST0_RTX (V8SImode));
12032 })
12033
12034 (define_insn "*avx_vzeroall"
12035 [(match_parallel 0 "vzeroall_operation"
12036 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
12037 (set (match_operand 1 "register_operand" "=x")
12038 (match_operand 2 "const0_operand" "X"))])]
12039 "TARGET_AVX"
12040 "vzeroall"
12041 [(set_attr "type" "sse")
12042 (set_attr "modrm" "0")
12043 (set_attr "memory" "none")
12044 (set_attr "prefix" "vex")
12045 (set_attr "mode" "OI")])
12046
12047 ;; vzeroupper clobbers the upper 128bits of AVX registers.
12048 (define_insn "avx_vzeroupper"
12049 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
12050 (clobber (reg:V8SI XMM0_REG))
12051 (clobber (reg:V8SI XMM1_REG))
12052 (clobber (reg:V8SI XMM2_REG))
12053 (clobber (reg:V8SI XMM3_REG))
12054 (clobber (reg:V8SI XMM4_REG))
12055 (clobber (reg:V8SI XMM5_REG))
12056 (clobber (reg:V8SI XMM6_REG))
12057 (clobber (reg:V8SI XMM7_REG))]
12058 "TARGET_AVX && !TARGET_64BIT"
12059 "vzeroupper"
12060 [(set_attr "type" "sse")
12061 (set_attr "modrm" "0")
12062 (set_attr "memory" "none")
12063 (set_attr "prefix" "vex")
12064 (set_attr "mode" "OI")])
12065
12066 (define_insn "avx_vzeroupper_rex64"
12067 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
12068 (clobber (reg:V8SI XMM0_REG))
12069 (clobber (reg:V8SI XMM1_REG))
12070 (clobber (reg:V8SI XMM2_REG))
12071 (clobber (reg:V8SI XMM3_REG))
12072 (clobber (reg:V8SI XMM4_REG))
12073 (clobber (reg:V8SI XMM5_REG))
12074 (clobber (reg:V8SI XMM6_REG))
12075 (clobber (reg:V8SI XMM7_REG))
12076 (clobber (reg:V8SI XMM8_REG))
12077 (clobber (reg:V8SI XMM9_REG))
12078 (clobber (reg:V8SI XMM10_REG))
12079 (clobber (reg:V8SI XMM11_REG))
12080 (clobber (reg:V8SI XMM12_REG))
12081 (clobber (reg:V8SI XMM13_REG))
12082 (clobber (reg:V8SI XMM14_REG))
12083 (clobber (reg:V8SI XMM15_REG))]
12084 "TARGET_AVX && TARGET_64BIT"
12085 "vzeroupper"
12086 [(set_attr "type" "sse")
12087 (set_attr "modrm" "0")
12088 (set_attr "memory" "none")
12089 (set_attr "prefix" "vex")
12090 (set_attr "mode" "OI")])
12091
12092 (define_expand "avx_vpermil<mode>"
12093 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
12094 (vec_select:AVXMODEFDP
12095 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
12096 (match_operand:SI 2 "const_0_to_255_operand" "")))]
12097 "TARGET_AVX"
12098 {
12099 int mask = INTVAL (operands[2]);
12100 rtx perm[<ssescalarnum>];
12101
12102 perm[0] = GEN_INT (mask & 1);
12103 perm[1] = GEN_INT ((mask >> 1) & 1);
12104 if (<MODE>mode == V4DFmode)
12105 {
12106 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
12107 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
12108 }
12109
12110 operands[2]
12111 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
12112 })
12113
12114 (define_expand "avx_vpermil<mode>"
12115 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
12116 (vec_select:AVXMODEFSP
12117 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
12118 (match_operand:SI 2 "const_0_to_255_operand" "")))]
12119 "TARGET_AVX"
12120 {
12121 int mask = INTVAL (operands[2]);
12122 rtx perm[<ssescalarnum>];
12123
12124 perm[0] = GEN_INT (mask & 3);
12125 perm[1] = GEN_INT ((mask >> 2) & 3);
12126 perm[2] = GEN_INT ((mask >> 4) & 3);
12127 perm[3] = GEN_INT ((mask >> 6) & 3);
12128 if (<MODE>mode == V8SFmode)
12129 {
12130 perm[4] = GEN_INT ((mask & 3) + 4);
12131 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
12132 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
12133 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
12134 }
12135
12136 operands[2]
12137 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
12138 })
12139
12140 (define_insn "*avx_vpermilp<mode>"
12141 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12142 (vec_select:AVXMODEF2P
12143 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
12144 (match_parallel 2 "avx_vpermilp_<mode>_operand"
12145 [(match_operand 3 "const_int_operand" "")])))]
12146 "TARGET_AVX"
12147 {
12148 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
12149 operands[2] = GEN_INT (mask);
12150 return "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}";
12151 }
12152 [(set_attr "type" "sselog")
12153 (set_attr "prefix_extra" "1")
12154 (set_attr "length_immediate" "1")
12155 (set_attr "prefix" "vex")
12156 (set_attr "mode" "<MODE>")])
12157
12158 (define_insn "avx_vpermilvar<mode>3"
12159 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12160 (unspec:AVXMODEF2P
12161 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12162 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
12163 UNSPEC_VPERMIL))]
12164 "TARGET_AVX"
12165 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12166 [(set_attr "type" "sselog")
12167 (set_attr "prefix_extra" "1")
12168 (set_attr "prefix" "vex")
12169 (set_attr "mode" "<MODE>")])
12170
12171 (define_insn "avx_vperm2f128<mode>3"
12172 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12173 (unspec:AVX256MODE2P
12174 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12175 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12176 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12177 UNSPEC_VPERMIL2F128))]
12178 "TARGET_AVX"
12179 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12180 [(set_attr "type" "sselog")
12181 (set_attr "prefix_extra" "1")
12182 (set_attr "length_immediate" "1")
12183 (set_attr "prefix" "vex")
12184 (set_attr "mode" "V8SF")])
12185
12186 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
12187 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
12188 (vec_concat:AVXMODEF4P
12189 (vec_concat:<avxhalfvecmode>
12190 (match_operand:<avxscalarmode> 1 "memory_operand" "m")
12191 (match_dup 1))
12192 (vec_concat:<avxhalfvecmode>
12193 (match_dup 1)
12194 (match_dup 1))))]
12195 "TARGET_AVX"
12196 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
12197 [(set_attr "type" "ssemov")
12198 (set_attr "prefix_extra" "1")
12199 (set_attr "prefix" "vex")
12200 (set_attr "mode" "<avxscalarmode>")])
12201
12202 (define_insn "avx_vbroadcastss256"
12203 [(set (match_operand:V8SF 0 "register_operand" "=x")
12204 (vec_concat:V8SF
12205 (vec_concat:V4SF
12206 (vec_concat:V2SF
12207 (match_operand:SF 1 "memory_operand" "m")
12208 (match_dup 1))
12209 (vec_concat:V2SF
12210 (match_dup 1)
12211 (match_dup 1)))
12212 (vec_concat:V4SF
12213 (vec_concat:V2SF
12214 (match_dup 1)
12215 (match_dup 1))
12216 (vec_concat:V2SF
12217 (match_dup 1)
12218 (match_dup 1)))))]
12219 "TARGET_AVX"
12220 "vbroadcastss\t{%1, %0|%0, %1}"
12221 [(set_attr "type" "ssemov")
12222 (set_attr "prefix_extra" "1")
12223 (set_attr "prefix" "vex")
12224 (set_attr "mode" "SF")])
12225
12226 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
12227 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
12228 (vec_concat:AVX256MODEF2P
12229 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
12230 (match_dup 1)))]
12231 "TARGET_AVX"
12232 "vbroadcastf128\t{%1, %0|%0, %1}"
12233 [(set_attr "type" "ssemov")
12234 (set_attr "prefix_extra" "1")
12235 (set_attr "prefix" "vex")
12236 (set_attr "mode" "V4SF")])
12237
12238 (define_expand "avx_vinsertf128<mode>"
12239 [(match_operand:AVX256MODE 0 "register_operand" "")
12240 (match_operand:AVX256MODE 1 "register_operand" "")
12241 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
12242 (match_operand:SI 3 "const_0_to_1_operand" "")]
12243 "TARGET_AVX"
12244 {
12245 switch (INTVAL (operands[3]))
12246 {
12247 case 0:
12248 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
12249 operands[2]));
12250 break;
12251 case 1:
12252 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
12253 operands[2]));
12254 break;
12255 default:
12256 gcc_unreachable ();
12257 }
12258 DONE;
12259 })
12260
12261 (define_insn "vec_set_lo_<mode>"
12262 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12263 (vec_concat:AVX256MODE4P
12264 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12265 (vec_select:<avxhalfvecmode>
12266 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12267 (parallel [(const_int 2) (const_int 3)]))))]
12268 "TARGET_AVX"
12269 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12270 [(set_attr "type" "sselog")
12271 (set_attr "prefix_extra" "1")
12272 (set_attr "length_immediate" "1")
12273 (set_attr "prefix" "vex")
12274 (set_attr "mode" "V8SF")])
12275
12276 (define_insn "vec_set_hi_<mode>"
12277 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12278 (vec_concat:AVX256MODE4P
12279 (vec_select:<avxhalfvecmode>
12280 (match_operand:AVX256MODE4P 1 "register_operand" "x")
12281 (parallel [(const_int 0) (const_int 1)]))
12282 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12283 "TARGET_AVX"
12284 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12285 [(set_attr "type" "sselog")
12286 (set_attr "prefix_extra" "1")
12287 (set_attr "length_immediate" "1")
12288 (set_attr "prefix" "vex")
12289 (set_attr "mode" "V8SF")])
12290
12291 (define_insn "vec_set_lo_<mode>"
12292 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12293 (vec_concat:AVX256MODE8P
12294 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
12295 (vec_select:<avxhalfvecmode>
12296 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12297 (parallel [(const_int 4) (const_int 5)
12298 (const_int 6) (const_int 7)]))))]
12299 "TARGET_AVX"
12300 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12301 [(set_attr "type" "sselog")
12302 (set_attr "prefix_extra" "1")
12303 (set_attr "length_immediate" "1")
12304 (set_attr "prefix" "vex")
12305 (set_attr "mode" "V8SF")])
12306
12307 (define_insn "vec_set_hi_<mode>"
12308 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12309 (vec_concat:AVX256MODE8P
12310 (vec_select:<avxhalfvecmode>
12311 (match_operand:AVX256MODE8P 1 "register_operand" "x")
12312 (parallel [(const_int 0) (const_int 1)
12313 (const_int 2) (const_int 3)]))
12314 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
12315 "TARGET_AVX"
12316 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12317 [(set_attr "type" "sselog")
12318 (set_attr "prefix_extra" "1")
12319 (set_attr "length_immediate" "1")
12320 (set_attr "prefix" "vex")
12321 (set_attr "mode" "V8SF")])
12322
12323 (define_insn "vec_set_lo_v16hi"
12324 [(set (match_operand:V16HI 0 "register_operand" "=x")
12325 (vec_concat:V16HI
12326 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12327 (vec_select:V8HI
12328 (match_operand:V16HI 1 "register_operand" "x")
12329 (parallel [(const_int 8) (const_int 9)
12330 (const_int 10) (const_int 11)
12331 (const_int 12) (const_int 13)
12332 (const_int 14) (const_int 15)]))))]
12333 "TARGET_AVX"
12334 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12335 [(set_attr "type" "sselog")
12336 (set_attr "prefix_extra" "1")
12337 (set_attr "length_immediate" "1")
12338 (set_attr "prefix" "vex")
12339 (set_attr "mode" "V8SF")])
12340
12341 (define_insn "vec_set_hi_v16hi"
12342 [(set (match_operand:V16HI 0 "register_operand" "=x")
12343 (vec_concat:V16HI
12344 (vec_select:V8HI
12345 (match_operand:V16HI 1 "register_operand" "x")
12346 (parallel [(const_int 0) (const_int 1)
12347 (const_int 2) (const_int 3)
12348 (const_int 4) (const_int 5)
12349 (const_int 6) (const_int 7)]))
12350 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12351 "TARGET_AVX"
12352 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12353 [(set_attr "type" "sselog")
12354 (set_attr "prefix_extra" "1")
12355 (set_attr "length_immediate" "1")
12356 (set_attr "prefix" "vex")
12357 (set_attr "mode" "V8SF")])
12358
12359 (define_insn "vec_set_lo_v32qi"
12360 [(set (match_operand:V32QI 0 "register_operand" "=x")
12361 (vec_concat:V32QI
12362 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12363 (vec_select:V16QI
12364 (match_operand:V32QI 1 "register_operand" "x")
12365 (parallel [(const_int 16) (const_int 17)
12366 (const_int 18) (const_int 19)
12367 (const_int 20) (const_int 21)
12368 (const_int 22) (const_int 23)
12369 (const_int 24) (const_int 25)
12370 (const_int 26) (const_int 27)
12371 (const_int 28) (const_int 29)
12372 (const_int 30) (const_int 31)]))))]
12373 "TARGET_AVX"
12374 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12375 [(set_attr "type" "sselog")
12376 (set_attr "prefix_extra" "1")
12377 (set_attr "length_immediate" "1")
12378 (set_attr "prefix" "vex")
12379 (set_attr "mode" "V8SF")])
12380
12381 (define_insn "vec_set_hi_v32qi"
12382 [(set (match_operand:V32QI 0 "register_operand" "=x")
12383 (vec_concat:V32QI
12384 (vec_select:V16QI
12385 (match_operand:V32QI 1 "register_operand" "x")
12386 (parallel [(const_int 0) (const_int 1)
12387 (const_int 2) (const_int 3)
12388 (const_int 4) (const_int 5)
12389 (const_int 6) (const_int 7)
12390 (const_int 8) (const_int 9)
12391 (const_int 10) (const_int 11)
12392 (const_int 12) (const_int 13)
12393 (const_int 14) (const_int 15)]))
12394 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12395 "TARGET_AVX"
12396 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12397 [(set_attr "type" "sselog")
12398 (set_attr "prefix_extra" "1")
12399 (set_attr "length_immediate" "1")
12400 (set_attr "prefix" "vex")
12401 (set_attr "mode" "V8SF")])
12402
12403 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
12404 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12405 (unspec:AVXMODEF2P
12406 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12407 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12408 (match_dup 0)]
12409 UNSPEC_MASKLOAD))]
12410 "TARGET_AVX"
12411 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
12412 [(set_attr "type" "sselog1")
12413 (set_attr "prefix_extra" "1")
12414 (set_attr "prefix" "vex")
12415 (set_attr "mode" "<MODE>")])
12416
12417 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
12418 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12419 (unspec:AVXMODEF2P
12420 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12421 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12422 (match_dup 0)]
12423 UNSPEC_MASKSTORE))]
12424 "TARGET_AVX"
12425 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
12426 [(set_attr "type" "sselog1")
12427 (set_attr "prefix_extra" "1")
12428 (set_attr "prefix" "vex")
12429 (set_attr "mode" "<MODE>")])
12430
12431 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12432 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
12433 (unspec:AVX256MODE2P
12434 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
12435 UNSPEC_CAST))]
12436 "TARGET_AVX"
12437 {
12438 switch (which_alternative)
12439 {
12440 case 0:
12441 return "";
12442 case 1:
12443 switch (get_attr_mode (insn))
12444 {
12445 case MODE_V8SF:
12446 return "vmovaps\t{%1, %x0|%x0, %1}";
12447 case MODE_V4DF:
12448 return "vmovapd\t{%1, %x0|%x0, %1}";
12449 case MODE_OI:
12450 return "vmovdqa\t{%1, %x0|%x0, %1}";
12451 default:
12452 break;
12453 }
12454 default:
12455 break;
12456 }
12457 gcc_unreachable ();
12458 }
12459 [(set_attr "type" "ssemov")
12460 (set_attr "prefix" "vex")
12461 (set_attr "mode" "<avxvecmode>")
12462 (set (attr "length")
12463 (if_then_else (eq_attr "alternative" "0")
12464 (const_string "0")
12465 (const_string "*")))])
12466
12467 (define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
12468 [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
12469 (unspec:<avxhalfvecmode>
12470 [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
12471 UNSPEC_CAST))]
12472 "TARGET_AVX"
12473 {
12474 switch (which_alternative)
12475 {
12476 case 0:
12477 return "";
12478 case 1:
12479 switch (get_attr_mode (insn))
12480 {
12481 case MODE_V8SF:
12482 return "vmovaps\t{%x1, %0|%0, %x1}";
12483 case MODE_V4DF:
12484 return "vmovapd\t{%x1, %0|%0, %x1}";
12485 case MODE_OI:
12486 return "vmovdqa\t{%x1, %0|%0, %x1}";
12487 default:
12488 break;
12489 }
12490 default:
12491 break;
12492 }
12493 gcc_unreachable ();
12494 }
12495 [(set_attr "type" "ssemov")
12496 (set_attr "prefix" "vex")
12497 (set_attr "mode" "<avxvecmode>")
12498 (set (attr "length")
12499 (if_then_else (eq_attr "alternative" "0")
12500 (const_string "0")
12501 (const_string "*")))])
12502
12503 (define_expand "vec_init<mode>"
12504 [(match_operand:AVX256MODE 0 "register_operand" "")
12505 (match_operand 1 "" "")]
12506 "TARGET_AVX"
12507 {
12508 ix86_expand_vector_init (false, operands[0], operands[1]);
12509 DONE;
12510 })
12511
12512 (define_insn "*vec_concat<mode>_avx"
12513 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12514 (vec_concat:AVX256MODE
12515 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12516 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12517 "TARGET_AVX"
12518 {
12519 switch (which_alternative)
12520 {
12521 case 0:
12522 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12523 case 1:
12524 switch (get_attr_mode (insn))
12525 {
12526 case MODE_V8SF:
12527 return "vmovaps\t{%1, %x0|%x0, %1}";
12528 case MODE_V4DF:
12529 return "vmovapd\t{%1, %x0|%x0, %1}";
12530 default:
12531 return "vmovdqa\t{%1, %x0|%x0, %1}";
12532 }
12533 default:
12534 gcc_unreachable ();
12535 }
12536 }
12537 [(set_attr "type" "sselog,ssemov")
12538 (set_attr "prefix_extra" "1,*")
12539 (set_attr "length_immediate" "1,*")
12540 (set_attr "prefix" "vex")
12541 (set_attr "mode" "<avxvecmode>")])