sse.md (*avx_<umaxmin:code><mode>3): Split from *avx_<maxmin:code><mode>3.
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Instruction suffix for sign and zero extensions.
23 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
24
25 ;; 16 byte integral modes handled by SSE
26 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
27
28 ;; All 16-byte vector modes handled by SSE
29 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
31
32 ;; 32 byte integral vector modes handled by AVX
33 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
34
35 ;; All 32-byte vector modes handled by AVX
36 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
37
38 ;; All QI vector modes handled by AVX
39 (define_mode_iterator AVXMODEQI [V32QI V16QI])
40
41 ;; All DI vector modes handled by AVX
42 (define_mode_iterator AVXMODEDI [V4DI V2DI])
43
44 ;; All vector modes handled by AVX
45 (define_mode_iterator AVXMODE
46 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
47 (define_mode_iterator AVXMODE16
48 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
49
50 ;; Mix-n-match
51 (define_mode_iterator SSEMODE12 [V16QI V8HI])
52 (define_mode_iterator SSEMODE24 [V8HI V4SI])
53 (define_mode_iterator SSEMODE14 [V16QI V4SI])
54 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
55 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
56 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
57 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
58 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
59 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
60
61 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
62 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
63 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
64 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
65 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
66 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
67 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
68 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
69 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
70 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
71 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
72
73 ;; Int-float size matches
74 (define_mode_iterator SSEMODE4S [V4SF V4SI])
75 (define_mode_iterator SSEMODE2D [V2DF V2DI])
76
77 ;; Modes handled by integer vcond pattern
78 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
79 (V2DI "TARGET_SSE4_2")])
80
81 ;; Modes handled by vec_extract_even/odd pattern.
82 (define_mode_iterator SSEMODE_EO
83 [(V4SF "TARGET_SSE")
84 (V2DF "TARGET_SSE2")
85 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
86 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
87 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
88
89 ;; Mapping from float mode to required SSE level
90 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
91
92 ;; Mapping from integer vector mode to mnemonic suffix
93 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
94
95 ;; Mapping of the insn mnemonic suffix
96 (define_mode_attr ssemodesuffix
97 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd")
98 (V8SI "ps") (V4DI "pd")])
99 (define_mode_attr ssescalarmodesuffix
100 [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V4DF "sd")
101 (V4SI "d")])
102
103 ;; Mapping of the max integer size for xop rotate immediate constraint
104 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
105
106 ;; Mapping of vector modes back to the scalar modes
107 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
108 (V16QI "QI") (V8HI "HI")
109 (V4SI "SI") (V2DI "DI")])
110
111 ;; Mapping of vector modes to a vector mode of double size
112 (define_mode_attr ssedoublesizemode
113 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
114 (V8HI "V16HI") (V16QI "V32QI")
115 (V4DF "V8DF") (V8SF "V16SF")
116 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
117
118 ;; Number of scalar elements in each vector type
119 (define_mode_attr ssescalarnum
120 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
121 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
122
123 ;; Mapping for AVX
124 (define_mode_attr avxvecmode
125 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
126 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
127 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
128 (define_mode_attr avxvecpsmode
129 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
130 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
131 (define_mode_attr avxhalfvecmode
132 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
133 (V8SF "V4SF") (V4DF "V2DF")
134 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
135 (define_mode_attr avxscalarmode
136 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
137 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
138 (define_mode_attr avxcvtvecmode
139 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
140 (define_mode_attr avxpermvecmode
141 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
142 (define_mode_attr avxmodesuffixp
143 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
144 (V4DF "pd")])
145 (define_mode_attr avxmodesuffix
146 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
147 (V8SI "256") (V8SF "256") (V4DF "256")])
148
149 ;; Mapping of immediate bits for blend instructions
150 (define_mode_attr blendbits
151 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
152
153 ;; Mapping of immediate bits for pinsr instructions
154 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
155
156 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
157
158 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
159 ;;
160 ;; Move patterns
161 ;;
162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
163
164 (define_expand "mov<mode>"
165 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
166 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
167 "TARGET_AVX"
168 {
169 ix86_expand_vector_move (<MODE>mode, operands);
170 DONE;
171 })
172
173 (define_insn "*avx_mov<mode>_internal"
174 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
175 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
176 "TARGET_AVX
177 && (register_operand (operands[0], <MODE>mode)
178 || register_operand (operands[1], <MODE>mode))"
179 {
180 switch (which_alternative)
181 {
182 case 0:
183 return standard_sse_constant_opcode (insn, operands[1]);
184 case 1:
185 case 2:
186 switch (get_attr_mode (insn))
187 {
188 case MODE_V8SF:
189 case MODE_V4SF:
190 return "vmovaps\t{%1, %0|%0, %1}";
191 case MODE_V4DF:
192 case MODE_V2DF:
193 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
194 return "vmovaps\t{%1, %0|%0, %1}";
195 else
196 return "vmovapd\t{%1, %0|%0, %1}";
197 default:
198 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
199 return "vmovaps\t{%1, %0|%0, %1}";
200 else
201 return "vmovdqa\t{%1, %0|%0, %1}";
202 }
203 default:
204 gcc_unreachable ();
205 }
206 }
207 [(set_attr "type" "sselog1,ssemov,ssemov")
208 (set_attr "prefix" "vex")
209 (set_attr "mode" "<avxvecmode>")])
210
211 ;; All of these patterns are enabled for SSE1 as well as SSE2.
212 ;; This is essential for maintaining stable calling conventions.
213
214 (define_expand "mov<mode>"
215 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
216 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
217 "TARGET_SSE"
218 {
219 ix86_expand_vector_move (<MODE>mode, operands);
220 DONE;
221 })
222
223 (define_insn "*mov<mode>_internal"
224 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
225 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
226 "TARGET_SSE
227 && (register_operand (operands[0], <MODE>mode)
228 || register_operand (operands[1], <MODE>mode))"
229 {
230 switch (which_alternative)
231 {
232 case 0:
233 return standard_sse_constant_opcode (insn, operands[1]);
234 case 1:
235 case 2:
236 switch (get_attr_mode (insn))
237 {
238 case MODE_V4SF:
239 return "movaps\t{%1, %0|%0, %1}";
240 case MODE_V2DF:
241 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
242 return "movaps\t{%1, %0|%0, %1}";
243 else
244 return "movapd\t{%1, %0|%0, %1}";
245 default:
246 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
247 return "movaps\t{%1, %0|%0, %1}";
248 else
249 return "movdqa\t{%1, %0|%0, %1}";
250 }
251 default:
252 gcc_unreachable ();
253 }
254 }
255 [(set_attr "type" "sselog1,ssemov,ssemov")
256 (set (attr "mode")
257 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
258 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
259 (and (eq_attr "alternative" "2")
260 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
261 (const_int 0))))
262 (const_string "V4SF")
263 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
264 (const_string "V4SF")
265 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
266 (const_string "V2DF")
267 ]
268 (const_string "TI")))])
269
270 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
271 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
272 ;; from memory, we'd prefer to load the memory directly into the %xmm
273 ;; register. To facilitate this happy circumstance, this pattern won't
274 ;; split until after register allocation. If the 64-bit value didn't
275 ;; come from memory, this is the best we can do. This is much better
276 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
277 ;; from there.
278
279 (define_insn_and_split "movdi_to_sse"
280 [(parallel
281 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
282 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
283 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
284 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
285 "#"
286 "&& reload_completed"
287 [(const_int 0)]
288 {
289 if (register_operand (operands[1], DImode))
290 {
291 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
292 Assemble the 64-bit DImode value in an xmm register. */
293 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
294 gen_rtx_SUBREG (SImode, operands[1], 0)));
295 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
296 gen_rtx_SUBREG (SImode, operands[1], 4)));
297 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
298 operands[2]));
299 }
300 else if (memory_operand (operands[1], DImode))
301 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
302 operands[1], const0_rtx));
303 else
304 gcc_unreachable ();
305 })
306
307 (define_split
308 [(set (match_operand:V4SF 0 "register_operand" "")
309 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
310 "TARGET_SSE && reload_completed"
311 [(set (match_dup 0)
312 (vec_merge:V4SF
313 (vec_duplicate:V4SF (match_dup 1))
314 (match_dup 2)
315 (const_int 1)))]
316 {
317 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
318 operands[2] = CONST0_RTX (V4SFmode);
319 })
320
321 (define_split
322 [(set (match_operand:V2DF 0 "register_operand" "")
323 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
324 "TARGET_SSE2 && reload_completed"
325 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
326 {
327 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
328 operands[2] = CONST0_RTX (DFmode);
329 })
330
331 (define_expand "push<mode>1"
332 [(match_operand:AVX256MODE 0 "register_operand" "")]
333 "TARGET_AVX"
334 {
335 ix86_expand_push (<MODE>mode, operands[0]);
336 DONE;
337 })
338
339 (define_expand "push<mode>1"
340 [(match_operand:SSEMODE16 0 "register_operand" "")]
341 "TARGET_SSE"
342 {
343 ix86_expand_push (<MODE>mode, operands[0]);
344 DONE;
345 })
346
347 (define_expand "movmisalign<mode>"
348 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
349 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
350 "TARGET_AVX"
351 {
352 ix86_expand_vector_move_misalign (<MODE>mode, operands);
353 DONE;
354 })
355
356 (define_expand "movmisalign<mode>"
357 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
358 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
359 "TARGET_SSE"
360 {
361 ix86_expand_vector_move_misalign (<MODE>mode, operands);
362 DONE;
363 })
364
365 (define_insn "avx_movu<ssemodesuffix><avxmodesuffix>"
366 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
367 (unspec:AVXMODEF2P
368 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
369 UNSPEC_MOVU))]
370 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
371 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
372 "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
373 [(set_attr "type" "ssemov")
374 (set_attr "movu" "1")
375 (set_attr "prefix" "vex")
376 (set_attr "mode" "<MODE>")])
377
378 (define_insn "sse2_movq128"
379 [(set (match_operand:V2DI 0 "register_operand" "=x")
380 (vec_concat:V2DI
381 (vec_select:DI
382 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
383 (parallel [(const_int 0)]))
384 (const_int 0)))]
385 "TARGET_SSE2"
386 "%vmovq\t{%1, %0|%0, %1}"
387 [(set_attr "type" "ssemov")
388 (set_attr "prefix" "maybe_vex")
389 (set_attr "mode" "TI")])
390
391 (define_insn "<sse>_movu<ssemodesuffix>"
392 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
393 (unspec:SSEMODEF2P
394 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
395 UNSPEC_MOVU))]
396 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
397 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
398 "movu<ssemodesuffix>\t{%1, %0|%0, %1}"
399 [(set_attr "type" "ssemov")
400 (set_attr "movu" "1")
401 (set_attr "mode" "<MODE>")])
402
403 (define_insn "avx_movdqu<avxmodesuffix>"
404 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
405 (unspec:AVXMODEQI
406 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
407 UNSPEC_MOVU))]
408 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
409 "vmovdqu\t{%1, %0|%0, %1}"
410 [(set_attr "type" "ssemov")
411 (set_attr "movu" "1")
412 (set_attr "prefix" "vex")
413 (set_attr "mode" "<avxvecmode>")])
414
415 (define_insn "sse2_movdqu"
416 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
417 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
418 UNSPEC_MOVU))]
419 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
420 "movdqu\t{%1, %0|%0, %1}"
421 [(set_attr "type" "ssemov")
422 (set_attr "movu" "1")
423 (set_attr "prefix_data16" "1")
424 (set_attr "mode" "TI")])
425
426 (define_insn "avx_movnt<mode>"
427 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
428 (unspec:AVXMODEF2P
429 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
430 UNSPEC_MOVNT))]
431 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
432 "vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
433 [(set_attr "type" "ssemov")
434 (set_attr "prefix" "vex")
435 (set_attr "mode" "<MODE>")])
436
437 (define_insn "<sse>_movnt<mode>"
438 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
439 (unspec:SSEMODEF2P
440 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
441 UNSPEC_MOVNT))]
442 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
443 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
444 [(set_attr "type" "ssemov")
445 (set_attr "mode" "<MODE>")])
446
447 (define_insn "avx_movnt<mode>"
448 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
449 (unspec:AVXMODEDI
450 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
451 UNSPEC_MOVNT))]
452 "TARGET_AVX"
453 "vmovntdq\t{%1, %0|%0, %1}"
454 [(set_attr "type" "ssecvt")
455 (set_attr "prefix" "vex")
456 (set_attr "mode" "<avxvecmode>")])
457
458 (define_insn "sse2_movntv2di"
459 [(set (match_operand:V2DI 0 "memory_operand" "=m")
460 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
461 UNSPEC_MOVNT))]
462 "TARGET_SSE2"
463 "movntdq\t{%1, %0|%0, %1}"
464 [(set_attr "type" "ssemov")
465 (set_attr "prefix_data16" "1")
466 (set_attr "mode" "TI")])
467
468 (define_insn "sse2_movntsi"
469 [(set (match_operand:SI 0 "memory_operand" "=m")
470 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
471 UNSPEC_MOVNT))]
472 "TARGET_SSE2"
473 "movnti\t{%1, %0|%0, %1}"
474 [(set_attr "type" "ssemov")
475 (set_attr "prefix_data16" "0")
476 (set_attr "mode" "V2DF")])
477
478 (define_insn "avx_lddqu<avxmodesuffix>"
479 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
480 (unspec:AVXMODEQI
481 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
482 UNSPEC_LDDQU))]
483 "TARGET_AVX"
484 "vlddqu\t{%1, %0|%0, %1}"
485 [(set_attr "type" "ssecvt")
486 (set_attr "movu" "1")
487 (set_attr "prefix" "vex")
488 (set_attr "mode" "<avxvecmode>")])
489
490 (define_insn "sse3_lddqu"
491 [(set (match_operand:V16QI 0 "register_operand" "=x")
492 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
493 UNSPEC_LDDQU))]
494 "TARGET_SSE3"
495 "lddqu\t{%1, %0|%0, %1}"
496 [(set_attr "type" "ssemov")
497 (set_attr "movu" "1")
498 (set_attr "prefix_data16" "0")
499 (set_attr "prefix_rep" "1")
500 (set_attr "mode" "TI")])
501
502 ; Expand patterns for non-temporal stores. At the moment, only those
503 ; that directly map to insns are defined; it would be possible to
504 ; define patterns for other modes that would expand to several insns.
505
506 (define_expand "storent<mode>"
507 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
508 (unspec:SSEMODEF2P
509 [(match_operand:SSEMODEF2P 1 "register_operand" "")]
510 UNSPEC_MOVNT))]
511 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)")
512
513 (define_expand "storent<mode>"
514 [(set (match_operand:MODEF 0 "memory_operand" "")
515 (unspec:MODEF
516 [(match_operand:MODEF 1 "register_operand" "")]
517 UNSPEC_MOVNT))]
518 "TARGET_SSE4A")
519
520 (define_expand "storentv2di"
521 [(set (match_operand:V2DI 0 "memory_operand" "")
522 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
523 UNSPEC_MOVNT))]
524 "TARGET_SSE2")
525
526 (define_expand "storentsi"
527 [(set (match_operand:SI 0 "memory_operand" "")
528 (unspec:SI [(match_operand:SI 1 "register_operand" "")]
529 UNSPEC_MOVNT))]
530 "TARGET_SSE2")
531
532 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
533 ;;
534 ;; Parallel floating point arithmetic
535 ;;
536 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
537
538 (define_expand "<code><mode>2"
539 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
540 (absneg:SSEMODEF2P
541 (match_operand:SSEMODEF2P 1 "register_operand" "")))]
542 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
543 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
544
545 (define_expand "<plusminus_insn><mode>3"
546 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
547 (plusminus:AVX256MODEF2P
548 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
549 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
550 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
551 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
552
553 (define_insn "*avx_<plusminus_insn><mode>3"
554 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
555 (plusminus:AVXMODEF2P
556 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
557 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
558 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
559 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
560 "v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
561 [(set_attr "type" "sseadd")
562 (set_attr "prefix" "vex")
563 (set_attr "mode" "<avxvecmode>")])
564
565 (define_expand "<plusminus_insn><mode>3"
566 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
567 (plusminus:SSEMODEF2P
568 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
569 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
570 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
571 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
572
573 (define_insn "*<plusminus_insn><mode>3"
574 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
575 (plusminus:SSEMODEF2P
576 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
577 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
578 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
579 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
580 "<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}"
581 [(set_attr "type" "sseadd")
582 (set_attr "mode" "<MODE>")])
583
584 (define_insn "*avx_vm<plusminus_insn><mode>3"
585 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
586 (vec_merge:SSEMODEF2P
587 (plusminus:SSEMODEF2P
588 (match_operand:SSEMODEF2P 1 "register_operand" "x")
589 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
590 (match_dup 1)
591 (const_int 1)))]
592 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
593 "v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
594 [(set_attr "type" "sseadd")
595 (set_attr "prefix" "vex")
596 (set_attr "mode" "<ssescalarmode>")])
597
598 (define_insn "<sse>_vm<plusminus_insn><mode>3"
599 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
600 (vec_merge:SSEMODEF2P
601 (plusminus:SSEMODEF2P
602 (match_operand:SSEMODEF2P 1 "register_operand" "0")
603 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
604 (match_dup 1)
605 (const_int 1)))]
606 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
607 "<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
608 [(set_attr "type" "sseadd")
609 (set_attr "mode" "<ssescalarmode>")])
610
611 (define_expand "mul<mode>3"
612 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
613 (mult:AVX256MODEF2P
614 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
615 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
616 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
617 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
618
619 (define_insn "*avx_mul<mode>3"
620 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
621 (mult:AVXMODEF2P
622 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
623 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
624 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
625 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
626 "vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
627 [(set_attr "type" "ssemul")
628 (set_attr "prefix" "vex")
629 (set_attr "mode" "<avxvecmode>")])
630
631 (define_expand "mul<mode>3"
632 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
633 (mult:SSEMODEF2P
634 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
635 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
636 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
637 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
638
639 (define_insn "*mul<mode>3"
640 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
641 (mult:SSEMODEF2P
642 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
643 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
644 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
645 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
646 "mul<ssemodesuffix>\t{%2, %0|%0, %2}"
647 [(set_attr "type" "ssemul")
648 (set_attr "mode" "<MODE>")])
649
650 (define_insn "*avx_vmmul<mode>3"
651 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
652 (vec_merge:SSEMODEF2P
653 (mult:SSEMODEF2P
654 (match_operand:SSEMODEF2P 1 "register_operand" "x")
655 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
656 (match_dup 1)
657 (const_int 1)))]
658 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
659 "vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
660 [(set_attr "type" "ssemul")
661 (set_attr "prefix" "vex")
662 (set_attr "mode" "<ssescalarmode>")])
663
664 (define_insn "<sse>_vmmul<mode>3"
665 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
666 (vec_merge:SSEMODEF2P
667 (mult:SSEMODEF2P
668 (match_operand:SSEMODEF2P 1 "register_operand" "0")
669 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
670 (match_dup 1)
671 (const_int 1)))]
672 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
673 "mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
674 [(set_attr "type" "ssemul")
675 (set_attr "mode" "<ssescalarmode>")])
676
677 (define_expand "divv8sf3"
678 [(set (match_operand:V8SF 0 "register_operand" "")
679 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
680 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
681 "TARGET_AVX"
682 {
683 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
684
685 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
686 && flag_finite_math_only && !flag_trapping_math
687 && flag_unsafe_math_optimizations)
688 {
689 ix86_emit_swdivsf (operands[0], operands[1],
690 operands[2], V8SFmode);
691 DONE;
692 }
693 })
694
695 (define_expand "divv4df3"
696 [(set (match_operand:V4DF 0 "register_operand" "")
697 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
698 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
699 "TARGET_AVX"
700 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
701
702 (define_insn "avx_div<mode>3"
703 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
704 (div:AVXMODEF2P
705 (match_operand:AVXMODEF2P 1 "register_operand" "x")
706 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
707 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
708 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
709 [(set_attr "type" "ssediv")
710 (set_attr "prefix" "vex")
711 (set_attr "mode" "<MODE>")])
712
713 (define_expand "divv4sf3"
714 [(set (match_operand:V4SF 0 "register_operand" "")
715 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
716 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
717 "TARGET_SSE"
718 {
719 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
720 && flag_finite_math_only && !flag_trapping_math
721 && flag_unsafe_math_optimizations)
722 {
723 ix86_emit_swdivsf (operands[0], operands[1],
724 operands[2], V4SFmode);
725 DONE;
726 }
727 })
728
729 (define_expand "divv2df3"
730 [(set (match_operand:V2DF 0 "register_operand" "")
731 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
732 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
733 "TARGET_SSE2")
734
735 (define_insn "*avx_div<mode>3"
736 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
737 (div:SSEMODEF2P
738 (match_operand:SSEMODEF2P 1 "register_operand" "x")
739 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
740 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
741 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
742 [(set_attr "type" "ssediv")
743 (set_attr "prefix" "vex")
744 (set_attr "mode" "<MODE>")])
745
746 (define_insn "<sse>_div<mode>3"
747 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
748 (div:SSEMODEF2P
749 (match_operand:SSEMODEF2P 1 "register_operand" "0")
750 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
751 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
752 "div<ssemodesuffix>\t{%2, %0|%0, %2}"
753 [(set_attr "type" "ssediv")
754 (set_attr "mode" "<MODE>")])
755
756 (define_insn "*avx_vmdiv<mode>3"
757 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
758 (vec_merge:SSEMODEF2P
759 (div:SSEMODEF2P
760 (match_operand:SSEMODEF2P 1 "register_operand" "x")
761 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
762 (match_dup 1)
763 (const_int 1)))]
764 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
765 "vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
766 [(set_attr "type" "ssediv")
767 (set_attr "prefix" "vex")
768 (set_attr "mode" "<ssescalarmode>")])
769
770 (define_insn "<sse>_vmdiv<mode>3"
771 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
772 (vec_merge:SSEMODEF2P
773 (div:SSEMODEF2P
774 (match_operand:SSEMODEF2P 1 "register_operand" "0")
775 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
776 (match_dup 1)
777 (const_int 1)))]
778 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
779 "div<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
780 [(set_attr "type" "ssediv")
781 (set_attr "mode" "<ssescalarmode>")])
782
783 (define_insn "avx_rcpv8sf2"
784 [(set (match_operand:V8SF 0 "register_operand" "=x")
785 (unspec:V8SF
786 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
787 "TARGET_AVX"
788 "vrcpps\t{%1, %0|%0, %1}"
789 [(set_attr "type" "sse")
790 (set_attr "prefix" "vex")
791 (set_attr "mode" "V8SF")])
792
793 (define_insn "sse_rcpv4sf2"
794 [(set (match_operand:V4SF 0 "register_operand" "=x")
795 (unspec:V4SF
796 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
797 "TARGET_SSE"
798 "%vrcpps\t{%1, %0|%0, %1}"
799 [(set_attr "type" "sse")
800 (set_attr "atom_sse_attr" "rcp")
801 (set_attr "prefix" "maybe_vex")
802 (set_attr "mode" "V4SF")])
803
804 (define_insn "*avx_vmrcpv4sf2"
805 [(set (match_operand:V4SF 0 "register_operand" "=x")
806 (vec_merge:V4SF
807 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
808 UNSPEC_RCP)
809 (match_operand:V4SF 2 "register_operand" "x")
810 (const_int 1)))]
811 "TARGET_AVX"
812 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
813 [(set_attr "type" "sse")
814 (set_attr "prefix" "vex")
815 (set_attr "mode" "SF")])
816
817 (define_insn "sse_vmrcpv4sf2"
818 [(set (match_operand:V4SF 0 "register_operand" "=x")
819 (vec_merge:V4SF
820 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
821 UNSPEC_RCP)
822 (match_operand:V4SF 2 "register_operand" "0")
823 (const_int 1)))]
824 "TARGET_SSE"
825 "rcpss\t{%1, %0|%0, %1}"
826 [(set_attr "type" "sse")
827 (set_attr "atom_sse_attr" "rcp")
828 (set_attr "mode" "SF")])
829
830 (define_expand "sqrtv8sf2"
831 [(set (match_operand:V8SF 0 "register_operand" "")
832 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
833 "TARGET_AVX"
834 {
835 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
836 && flag_finite_math_only && !flag_trapping_math
837 && flag_unsafe_math_optimizations)
838 {
839 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
840 DONE;
841 }
842 })
843
844 (define_insn "avx_sqrtv8sf2"
845 [(set (match_operand:V8SF 0 "register_operand" "=x")
846 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
847 "TARGET_AVX"
848 "vsqrtps\t{%1, %0|%0, %1}"
849 [(set_attr "type" "sse")
850 (set_attr "prefix" "vex")
851 (set_attr "mode" "V8SF")])
852
853 (define_expand "sqrtv4sf2"
854 [(set (match_operand:V4SF 0 "register_operand" "")
855 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
856 "TARGET_SSE"
857 {
858 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
859 && flag_finite_math_only && !flag_trapping_math
860 && flag_unsafe_math_optimizations)
861 {
862 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
863 DONE;
864 }
865 })
866
867 (define_insn "sse_sqrtv4sf2"
868 [(set (match_operand:V4SF 0 "register_operand" "=x")
869 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
870 "TARGET_SSE"
871 "%vsqrtps\t{%1, %0|%0, %1}"
872 [(set_attr "type" "sse")
873 (set_attr "atom_sse_attr" "sqrt")
874 (set_attr "prefix" "maybe_vex")
875 (set_attr "mode" "V4SF")])
876
877 (define_insn "sqrtv4df2"
878 [(set (match_operand:V4DF 0 "register_operand" "=x")
879 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
880 "TARGET_AVX"
881 "vsqrtpd\t{%1, %0|%0, %1}"
882 [(set_attr "type" "sse")
883 (set_attr "prefix" "vex")
884 (set_attr "mode" "V4DF")])
885
886 (define_insn "sqrtv2df2"
887 [(set (match_operand:V2DF 0 "register_operand" "=x")
888 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
889 "TARGET_SSE2"
890 "%vsqrtpd\t{%1, %0|%0, %1}"
891 [(set_attr "type" "sse")
892 (set_attr "prefix" "maybe_vex")
893 (set_attr "mode" "V2DF")])
894
895 (define_insn "*avx_vmsqrt<mode>2"
896 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
897 (vec_merge:SSEMODEF2P
898 (sqrt:SSEMODEF2P
899 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
900 (match_operand:SSEMODEF2P 2 "register_operand" "x")
901 (const_int 1)))]
902 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
903 "vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
904 [(set_attr "type" "sse")
905 (set_attr "prefix" "vex")
906 (set_attr "mode" "<ssescalarmode>")])
907
908 (define_insn "<sse>_vmsqrt<mode>2"
909 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
910 (vec_merge:SSEMODEF2P
911 (sqrt:SSEMODEF2P
912 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
913 (match_operand:SSEMODEF2P 2 "register_operand" "0")
914 (const_int 1)))]
915 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
916 "sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
917 [(set_attr "type" "sse")
918 (set_attr "atom_sse_attr" "sqrt")
919 (set_attr "mode" "<ssescalarmode>")])
920
921 (define_expand "rsqrtv8sf2"
922 [(set (match_operand:V8SF 0 "register_operand" "")
923 (unspec:V8SF
924 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
925 "TARGET_AVX && TARGET_SSE_MATH"
926 {
927 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
928 DONE;
929 })
930
931 (define_insn "avx_rsqrtv8sf2"
932 [(set (match_operand:V8SF 0 "register_operand" "=x")
933 (unspec:V8SF
934 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
935 "TARGET_AVX"
936 "vrsqrtps\t{%1, %0|%0, %1}"
937 [(set_attr "type" "sse")
938 (set_attr "prefix" "vex")
939 (set_attr "mode" "V8SF")])
940
941 (define_expand "rsqrtv4sf2"
942 [(set (match_operand:V4SF 0 "register_operand" "")
943 (unspec:V4SF
944 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
945 "TARGET_SSE_MATH"
946 {
947 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
948 DONE;
949 })
950
951 (define_insn "sse_rsqrtv4sf2"
952 [(set (match_operand:V4SF 0 "register_operand" "=x")
953 (unspec:V4SF
954 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
955 "TARGET_SSE"
956 "%vrsqrtps\t{%1, %0|%0, %1}"
957 [(set_attr "type" "sse")
958 (set_attr "prefix" "maybe_vex")
959 (set_attr "mode" "V4SF")])
960
961 (define_insn "*avx_vmrsqrtv4sf2"
962 [(set (match_operand:V4SF 0 "register_operand" "=x")
963 (vec_merge:V4SF
964 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
965 UNSPEC_RSQRT)
966 (match_operand:V4SF 2 "register_operand" "x")
967 (const_int 1)))]
968 "TARGET_AVX"
969 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
970 [(set_attr "type" "sse")
971 (set_attr "prefix" "vex")
972 (set_attr "mode" "SF")])
973
974 (define_insn "sse_vmrsqrtv4sf2"
975 [(set (match_operand:V4SF 0 "register_operand" "=x")
976 (vec_merge:V4SF
977 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
978 UNSPEC_RSQRT)
979 (match_operand:V4SF 2 "register_operand" "0")
980 (const_int 1)))]
981 "TARGET_SSE"
982 "rsqrtss\t{%1, %0|%0, %1}"
983 [(set_attr "type" "sse")
984 (set_attr "mode" "SF")])
985
986 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
987 ;; isn't really correct, as those rtl operators aren't defined when
988 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
989
990 (define_expand "<code><mode>3"
991 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
992 (smaxmin:AVX256MODEF2P
993 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
994 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
995 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
996 {
997 if (!flag_finite_math_only)
998 operands[1] = force_reg (<MODE>mode, operands[1]);
999 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1000 })
1001
1002 (define_expand "<code><mode>3"
1003 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1004 (smaxmin:SSEMODEF2P
1005 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1006 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1007 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1008 {
1009 if (!flag_finite_math_only)
1010 operands[1] = force_reg (<MODE>mode, operands[1]);
1011 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1012 })
1013
1014 (define_insn "*avx_<code><mode>3_finite"
1015 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1016 (smaxmin:AVXMODEF2P
1017 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1018 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1019 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1020 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1021 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1022 [(set_attr "type" "sseadd")
1023 (set_attr "prefix" "vex")
1024 (set_attr "mode" "<MODE>")])
1025
1026 (define_insn "*<code><mode>3_finite"
1027 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1028 (smaxmin:SSEMODEF2P
1029 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1030 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1031 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1032 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1033 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1034 [(set_attr "type" "sseadd")
1035 (set_attr "mode" "<MODE>")])
1036
1037 (define_insn "*avx_<code><mode>3"
1038 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1039 (smaxmin:AVXMODEF2P
1040 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1041 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1042 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1043 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1044 [(set_attr "type" "sseadd")
1045 (set_attr "prefix" "vex")
1046 (set_attr "mode" "<avxvecmode>")])
1047
1048 (define_insn "*<code><mode>3"
1049 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1050 (smaxmin:SSEMODEF2P
1051 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1052 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1053 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1054 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1055 [(set_attr "type" "sseadd")
1056 (set_attr "mode" "<MODE>")])
1057
1058 (define_insn "*avx_vm<code><mode>3"
1059 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1060 (vec_merge:SSEMODEF2P
1061 (smaxmin:SSEMODEF2P
1062 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1063 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1064 (match_dup 1)
1065 (const_int 1)))]
1066 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1067 "v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1068 [(set_attr "type" "sse")
1069 (set_attr "prefix" "vex")
1070 (set_attr "mode" "<ssescalarmode>")])
1071
1072 (define_insn "<sse>_vm<code><mode>3"
1073 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1074 (vec_merge:SSEMODEF2P
1075 (smaxmin:SSEMODEF2P
1076 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1077 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1078 (match_dup 1)
1079 (const_int 1)))]
1080 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1081 "<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1082 [(set_attr "type" "sseadd")
1083 (set_attr "mode" "<ssescalarmode>")])
1084
1085 ;; These versions of the min/max patterns implement exactly the operations
1086 ;; min = (op1 < op2 ? op1 : op2)
1087 ;; max = (!(op1 < op2) ? op1 : op2)
1088 ;; Their operands are not commutative, and thus they may be used in the
1089 ;; presence of -0.0 and NaN.
1090
1091 (define_insn "*avx_ieee_smin<mode>3"
1092 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1093 (unspec:AVXMODEF2P
1094 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1095 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1096 UNSPEC_IEEE_MIN))]
1097 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1098 "vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1099 [(set_attr "type" "sseadd")
1100 (set_attr "prefix" "vex")
1101 (set_attr "mode" "<avxvecmode>")])
1102
1103 (define_insn "*avx_ieee_smax<mode>3"
1104 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1105 (unspec:AVXMODEF2P
1106 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1107 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1108 UNSPEC_IEEE_MAX))]
1109 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1110 "vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1111 [(set_attr "type" "sseadd")
1112 (set_attr "prefix" "vex")
1113 (set_attr "mode" "<avxvecmode>")])
1114
1115 (define_insn "*ieee_smin<mode>3"
1116 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1117 (unspec:SSEMODEF2P
1118 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1119 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1120 UNSPEC_IEEE_MIN))]
1121 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1122 "min<ssemodesuffix>\t{%2, %0|%0, %2}"
1123 [(set_attr "type" "sseadd")
1124 (set_attr "mode" "<MODE>")])
1125
1126 (define_insn "*ieee_smax<mode>3"
1127 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1128 (unspec:SSEMODEF2P
1129 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1130 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1131 UNSPEC_IEEE_MAX))]
1132 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1133 "max<ssemodesuffix>\t{%2, %0|%0, %2}"
1134 [(set_attr "type" "sseadd")
1135 (set_attr "mode" "<MODE>")])
1136
1137 (define_insn "avx_addsubv8sf3"
1138 [(set (match_operand:V8SF 0 "register_operand" "=x")
1139 (vec_merge:V8SF
1140 (plus:V8SF
1141 (match_operand:V8SF 1 "register_operand" "x")
1142 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1143 (minus:V8SF (match_dup 1) (match_dup 2))
1144 (const_int 170)))]
1145 "TARGET_AVX"
1146 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1147 [(set_attr "type" "sseadd")
1148 (set_attr "prefix" "vex")
1149 (set_attr "mode" "V8SF")])
1150
1151 (define_insn "avx_addsubv4df3"
1152 [(set (match_operand:V4DF 0 "register_operand" "=x")
1153 (vec_merge:V4DF
1154 (plus:V4DF
1155 (match_operand:V4DF 1 "register_operand" "x")
1156 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1157 (minus:V4DF (match_dup 1) (match_dup 2))
1158 (const_int 10)))]
1159 "TARGET_AVX"
1160 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1161 [(set_attr "type" "sseadd")
1162 (set_attr "prefix" "vex")
1163 (set_attr "mode" "V4DF")])
1164
1165 (define_insn "*avx_addsubv4sf3"
1166 [(set (match_operand:V4SF 0 "register_operand" "=x")
1167 (vec_merge:V4SF
1168 (plus:V4SF
1169 (match_operand:V4SF 1 "register_operand" "x")
1170 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1171 (minus:V4SF (match_dup 1) (match_dup 2))
1172 (const_int 10)))]
1173 "TARGET_AVX"
1174 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1175 [(set_attr "type" "sseadd")
1176 (set_attr "prefix" "vex")
1177 (set_attr "mode" "V4SF")])
1178
1179 (define_insn "sse3_addsubv4sf3"
1180 [(set (match_operand:V4SF 0 "register_operand" "=x")
1181 (vec_merge:V4SF
1182 (plus:V4SF
1183 (match_operand:V4SF 1 "register_operand" "0")
1184 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1185 (minus:V4SF (match_dup 1) (match_dup 2))
1186 (const_int 10)))]
1187 "TARGET_SSE3"
1188 "addsubps\t{%2, %0|%0, %2}"
1189 [(set_attr "type" "sseadd")
1190 (set_attr "prefix_rep" "1")
1191 (set_attr "mode" "V4SF")])
1192
1193 (define_insn "*avx_addsubv2df3"
1194 [(set (match_operand:V2DF 0 "register_operand" "=x")
1195 (vec_merge:V2DF
1196 (plus:V2DF
1197 (match_operand:V2DF 1 "register_operand" "x")
1198 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1199 (minus:V2DF (match_dup 1) (match_dup 2))
1200 (const_int 2)))]
1201 "TARGET_AVX"
1202 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1203 [(set_attr "type" "sseadd")
1204 (set_attr "prefix" "vex")
1205 (set_attr "mode" "V2DF")])
1206
1207 (define_insn "sse3_addsubv2df3"
1208 [(set (match_operand:V2DF 0 "register_operand" "=x")
1209 (vec_merge:V2DF
1210 (plus:V2DF
1211 (match_operand:V2DF 1 "register_operand" "0")
1212 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1213 (minus:V2DF (match_dup 1) (match_dup 2))
1214 (const_int 2)))]
1215 "TARGET_SSE3"
1216 "addsubpd\t{%2, %0|%0, %2}"
1217 [(set_attr "type" "sseadd")
1218 (set_attr "atom_unit" "complex")
1219 (set_attr "mode" "V2DF")])
1220
1221 (define_insn "avx_h<plusminus_insn>v4df3"
1222 [(set (match_operand:V4DF 0 "register_operand" "=x")
1223 (vec_concat:V4DF
1224 (vec_concat:V2DF
1225 (plusminus:DF
1226 (vec_select:DF
1227 (match_operand:V4DF 1 "register_operand" "x")
1228 (parallel [(const_int 0)]))
1229 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1230 (plusminus:DF
1231 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1232 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1233 (vec_concat:V2DF
1234 (plusminus:DF
1235 (vec_select:DF
1236 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1237 (parallel [(const_int 0)]))
1238 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1239 (plusminus:DF
1240 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1241 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1242 "TARGET_AVX"
1243 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1244 [(set_attr "type" "sseadd")
1245 (set_attr "prefix" "vex")
1246 (set_attr "mode" "V4DF")])
1247
1248 (define_insn "avx_h<plusminus_insn>v8sf3"
1249 [(set (match_operand:V8SF 0 "register_operand" "=x")
1250 (vec_concat:V8SF
1251 (vec_concat:V4SF
1252 (vec_concat:V2SF
1253 (plusminus:SF
1254 (vec_select:SF
1255 (match_operand:V8SF 1 "register_operand" "x")
1256 (parallel [(const_int 0)]))
1257 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1258 (plusminus:SF
1259 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1260 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1261 (vec_concat:V2SF
1262 (plusminus:SF
1263 (vec_select:SF
1264 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1265 (parallel [(const_int 0)]))
1266 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1267 (plusminus:SF
1268 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1269 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1270 (vec_concat:V4SF
1271 (vec_concat:V2SF
1272 (plusminus:SF
1273 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1274 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1275 (plusminus:SF
1276 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1277 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1278 (vec_concat:V2SF
1279 (plusminus:SF
1280 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1281 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1282 (plusminus:SF
1283 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1284 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1285 "TARGET_AVX"
1286 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1287 [(set_attr "type" "sseadd")
1288 (set_attr "prefix" "vex")
1289 (set_attr "mode" "V8SF")])
1290
1291 (define_insn "*avx_h<plusminus_insn>v4sf3"
1292 [(set (match_operand:V4SF 0 "register_operand" "=x")
1293 (vec_concat:V4SF
1294 (vec_concat:V2SF
1295 (plusminus:SF
1296 (vec_select:SF
1297 (match_operand:V4SF 1 "register_operand" "x")
1298 (parallel [(const_int 0)]))
1299 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1300 (plusminus:SF
1301 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1302 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1303 (vec_concat:V2SF
1304 (plusminus:SF
1305 (vec_select:SF
1306 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1307 (parallel [(const_int 0)]))
1308 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1309 (plusminus:SF
1310 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1311 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1312 "TARGET_AVX"
1313 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1314 [(set_attr "type" "sseadd")
1315 (set_attr "prefix" "vex")
1316 (set_attr "mode" "V4SF")])
1317
1318 (define_insn "sse3_h<plusminus_insn>v4sf3"
1319 [(set (match_operand:V4SF 0 "register_operand" "=x")
1320 (vec_concat:V4SF
1321 (vec_concat:V2SF
1322 (plusminus:SF
1323 (vec_select:SF
1324 (match_operand:V4SF 1 "register_operand" "0")
1325 (parallel [(const_int 0)]))
1326 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1327 (plusminus:SF
1328 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1329 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1330 (vec_concat:V2SF
1331 (plusminus:SF
1332 (vec_select:SF
1333 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1334 (parallel [(const_int 0)]))
1335 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1336 (plusminus:SF
1337 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1338 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1339 "TARGET_SSE3"
1340 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1341 [(set_attr "type" "sseadd")
1342 (set_attr "atom_unit" "complex")
1343 (set_attr "prefix_rep" "1")
1344 (set_attr "mode" "V4SF")])
1345
1346 (define_insn "*avx_h<plusminus_insn>v2df3"
1347 [(set (match_operand:V2DF 0 "register_operand" "=x")
1348 (vec_concat:V2DF
1349 (plusminus:DF
1350 (vec_select:DF
1351 (match_operand:V2DF 1 "register_operand" "x")
1352 (parallel [(const_int 0)]))
1353 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1354 (plusminus:DF
1355 (vec_select:DF
1356 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1357 (parallel [(const_int 0)]))
1358 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1359 "TARGET_AVX"
1360 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1361 [(set_attr "type" "sseadd")
1362 (set_attr "prefix" "vex")
1363 (set_attr "mode" "V2DF")])
1364
1365 (define_insn "sse3_h<plusminus_insn>v2df3"
1366 [(set (match_operand:V2DF 0 "register_operand" "=x")
1367 (vec_concat:V2DF
1368 (plusminus:DF
1369 (vec_select:DF
1370 (match_operand:V2DF 1 "register_operand" "0")
1371 (parallel [(const_int 0)]))
1372 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1373 (plusminus:DF
1374 (vec_select:DF
1375 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1376 (parallel [(const_int 0)]))
1377 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1378 "TARGET_SSE3"
1379 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1380 [(set_attr "type" "sseadd")
1381 (set_attr "mode" "V2DF")])
1382
1383 (define_expand "reduc_splus_v4sf"
1384 [(match_operand:V4SF 0 "register_operand" "")
1385 (match_operand:V4SF 1 "register_operand" "")]
1386 "TARGET_SSE"
1387 {
1388 if (TARGET_SSE3)
1389 {
1390 rtx tmp = gen_reg_rtx (V4SFmode);
1391 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1392 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1393 }
1394 else
1395 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1396 DONE;
1397 })
1398
1399 (define_expand "reduc_splus_v2df"
1400 [(match_operand:V2DF 0 "register_operand" "")
1401 (match_operand:V2DF 1 "register_operand" "")]
1402 "TARGET_SSE3"
1403 {
1404 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1405 DONE;
1406 })
1407
1408 (define_expand "reduc_smax_v4sf"
1409 [(match_operand:V4SF 0 "register_operand" "")
1410 (match_operand:V4SF 1 "register_operand" "")]
1411 "TARGET_SSE"
1412 {
1413 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1414 DONE;
1415 })
1416
1417 (define_expand "reduc_smin_v4sf"
1418 [(match_operand:V4SF 0 "register_operand" "")
1419 (match_operand:V4SF 1 "register_operand" "")]
1420 "TARGET_SSE"
1421 {
1422 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1423 DONE;
1424 })
1425
1426 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1427 ;;
1428 ;; Parallel floating point comparisons
1429 ;;
1430 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1431
1432 (define_insn "avx_cmp<ssemodesuffix><mode>3"
1433 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1434 (unspec:AVXMODEF2P
1435 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1436 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1437 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1438 UNSPEC_PCMP))]
1439 "TARGET_AVX"
1440 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1441 [(set_attr "type" "ssecmp")
1442 (set_attr "length_immediate" "1")
1443 (set_attr "prefix" "vex")
1444 (set_attr "mode" "<MODE>")])
1445
1446 (define_insn "avx_cmp<ssescalarmodesuffix><mode>3"
1447 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1448 (vec_merge:SSEMODEF2P
1449 (unspec:SSEMODEF2P
1450 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1451 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1452 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1453 UNSPEC_PCMP)
1454 (match_dup 1)
1455 (const_int 1)))]
1456 "TARGET_AVX"
1457 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1458 [(set_attr "type" "ssecmp")
1459 (set_attr "length_immediate" "1")
1460 (set_attr "prefix" "vex")
1461 (set_attr "mode" "<ssescalarmode>")])
1462
1463 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1464 ;; may generate 256bit vector compare instructions.
1465 (define_insn "*avx_maskcmp<mode>3"
1466 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1467 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1468 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1469 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1470 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1471 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1472 [(set_attr "type" "ssecmp")
1473 (set_attr "prefix" "vex")
1474 (set_attr "length_immediate" "1")
1475 (set_attr "mode" "<avxvecmode>")])
1476
1477 (define_insn "<sse>_maskcmp<mode>3"
1478 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1479 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1480 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1481 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1482 "!TARGET_XOP
1483 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1484 "cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}"
1485 [(set_attr "type" "ssecmp")
1486 (set_attr "length_immediate" "1")
1487 (set_attr "mode" "<MODE>")])
1488
1489 (define_insn "*avx_vmmaskcmp<mode>3"
1490 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1491 (vec_merge:SSEMODEF2P
1492 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1493 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1494 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1495 (match_dup 1)
1496 (const_int 1)))]
1497 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1498 "vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1499 [(set_attr "type" "ssecmp")
1500 (set_attr "prefix" "vex")
1501 (set_attr "mode" "<ssescalarmode>")])
1502
1503 (define_insn "<sse>_vmmaskcmp<mode>3"
1504 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1505 (vec_merge:SSEMODEF2P
1506 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1507 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1508 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1509 (match_dup 1)
1510 (const_int 1)))]
1511 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1512 "cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1513 [(set_attr "type" "ssecmp")
1514 (set_attr "length_immediate" "1")
1515 (set_attr "mode" "<ssescalarmode>")])
1516
1517 (define_insn "<sse>_comi"
1518 [(set (reg:CCFP FLAGS_REG)
1519 (compare:CCFP
1520 (vec_select:MODEF
1521 (match_operand:<ssevecmode> 0 "register_operand" "x")
1522 (parallel [(const_int 0)]))
1523 (vec_select:MODEF
1524 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1525 (parallel [(const_int 0)]))))]
1526 "SSE_FLOAT_MODE_P (<MODE>mode)"
1527 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1528 [(set_attr "type" "ssecomi")
1529 (set_attr "prefix" "maybe_vex")
1530 (set_attr "prefix_rep" "0")
1531 (set (attr "prefix_data16")
1532 (if_then_else (eq_attr "mode" "DF")
1533 (const_string "1")
1534 (const_string "0")))
1535 (set_attr "mode" "<MODE>")])
1536
1537 (define_insn "<sse>_ucomi"
1538 [(set (reg:CCFPU FLAGS_REG)
1539 (compare:CCFPU
1540 (vec_select:MODEF
1541 (match_operand:<ssevecmode> 0 "register_operand" "x")
1542 (parallel [(const_int 0)]))
1543 (vec_select:MODEF
1544 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1545 (parallel [(const_int 0)]))))]
1546 "SSE_FLOAT_MODE_P (<MODE>mode)"
1547 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1548 [(set_attr "type" "ssecomi")
1549 (set_attr "prefix" "maybe_vex")
1550 (set_attr "prefix_rep" "0")
1551 (set (attr "prefix_data16")
1552 (if_then_else (eq_attr "mode" "DF")
1553 (const_string "1")
1554 (const_string "0")))
1555 (set_attr "mode" "<MODE>")])
1556
1557 (define_expand "vcond<mode>"
1558 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1559 (if_then_else:AVXMODEF2P
1560 (match_operator 3 ""
1561 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1562 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1563 (match_operand:AVXMODEF2P 1 "general_operand" "")
1564 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1565 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1566 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1567 {
1568 bool ok = ix86_expand_fp_vcond (operands);
1569 gcc_assert (ok);
1570 DONE;
1571 })
1572
1573 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1574 ;;
1575 ;; Parallel floating point logical operations
1576 ;;
1577 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1578
1579 (define_insn "avx_andnot<mode>3"
1580 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1581 (and:AVXMODEF2P
1582 (not:AVXMODEF2P
1583 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1584 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1585 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1586 "vandn<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1587 [(set_attr "type" "sselog")
1588 (set_attr "prefix" "vex")
1589 (set_attr "mode" "<avxvecmode>")])
1590
1591 (define_insn "<sse>_andnot<mode>3"
1592 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1593 (and:SSEMODEF2P
1594 (not:SSEMODEF2P
1595 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1596 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1597 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1598 "andn<ssemodesuffix>\t{%2, %0|%0, %2}"
1599 [(set_attr "type" "sselog")
1600 (set_attr "mode" "<MODE>")])
1601
1602 (define_expand "<code><mode>3"
1603 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1604 (any_logic:AVX256MODEF2P
1605 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1606 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1607 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1608 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1609
1610 (define_insn "*avx_<code><mode>3"
1611 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1612 (any_logic:AVXMODEF2P
1613 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1614 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1615 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1616 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1617 {
1618 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1619 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1620 else
1621 return "v<logic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1622 }
1623 [(set_attr "type" "sselog")
1624 (set_attr "prefix" "vex")
1625 (set_attr "mode" "<avxvecmode>")])
1626
1627 (define_expand "<code><mode>3"
1628 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1629 (any_logic:SSEMODEF2P
1630 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1631 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1632 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1633 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1634
1635 (define_insn "*<code><mode>3"
1636 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1637 (any_logic:SSEMODEF2P
1638 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1639 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1640 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1641 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1642 {
1643 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1644 return "<logic>ps\t{%2, %0|%0, %2}";
1645 else
1646 return "<logic><ssemodesuffix>\t{%2, %0|%0, %2}";
1647 }
1648 [(set_attr "type" "sselog")
1649 (set_attr "mode" "<MODE>")])
1650
1651 (define_expand "copysign<mode>3"
1652 [(set (match_dup 4)
1653 (and:SSEMODEF2P
1654 (not:SSEMODEF2P (match_dup 3))
1655 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1656 (set (match_dup 5)
1657 (and:SSEMODEF2P (match_dup 3)
1658 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1659 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1660 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1661 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1662 {
1663 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1664
1665 operands[4] = gen_reg_rtx (<MODE>mode);
1666 operands[5] = gen_reg_rtx (<MODE>mode);
1667 })
1668
1669 ;; Also define scalar versions. These are used for abs, neg, and
1670 ;; conditional move. Using subregs into vector modes causes register
1671 ;; allocation lossage. These patterns do not allow memory operands
1672 ;; because the native instructions read the full 128-bits.
1673
1674 (define_insn "*avx_andnot<mode>3"
1675 [(set (match_operand:MODEF 0 "register_operand" "=x")
1676 (and:MODEF
1677 (not:MODEF
1678 (match_operand:MODEF 1 "register_operand" "x"))
1679 (match_operand:MODEF 2 "register_operand" "x")))]
1680 "AVX_FLOAT_MODE_P (<MODE>mode)"
1681 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1682 [(set_attr "type" "sselog")
1683 (set_attr "prefix" "vex")
1684 (set_attr "mode" "<ssevecmode>")])
1685
1686 (define_insn "*andnot<mode>3"
1687 [(set (match_operand:MODEF 0 "register_operand" "=x")
1688 (and:MODEF
1689 (not:MODEF
1690 (match_operand:MODEF 1 "register_operand" "0"))
1691 (match_operand:MODEF 2 "register_operand" "x")))]
1692 "SSE_FLOAT_MODE_P (<MODE>mode)"
1693 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1694 [(set_attr "type" "sselog")
1695 (set_attr "mode" "<ssevecmode>")])
1696
1697 (define_insn "*avx_<code><mode>3"
1698 [(set (match_operand:MODEF 0 "register_operand" "=x")
1699 (any_logic:MODEF
1700 (match_operand:MODEF 1 "register_operand" "x")
1701 (match_operand:MODEF 2 "register_operand" "x")))]
1702 "AVX_FLOAT_MODE_P (<MODE>mode)"
1703 {
1704 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1705 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1706 else
1707 return "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}";
1708 }
1709 [(set_attr "type" "sselog")
1710 (set_attr "prefix" "vex")
1711 (set_attr "mode" "<ssevecmode>")])
1712
1713 (define_insn "*<code><mode>3"
1714 [(set (match_operand:MODEF 0 "register_operand" "=x")
1715 (any_logic:MODEF
1716 (match_operand:MODEF 1 "register_operand" "0")
1717 (match_operand:MODEF 2 "register_operand" "x")))]
1718 "SSE_FLOAT_MODE_P (<MODE>mode)"
1719 {
1720 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1721 return "<logic>ps\t{%2, %0|%0, %2}";
1722 else
1723 return "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}";
1724 }
1725 [(set_attr "type" "sselog")
1726 (set_attr "mode" "<ssevecmode>")])
1727
1728 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1729 ;;
1730 ;; FMA4 floating point multiply/accumulate instructions. This
1731 ;; includes the scalar version of the instructions as well as the
1732 ;; vector.
1733 ;;
1734 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1735
1736 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1737 ;; combine to generate a multiply/add with two memory references. We then
1738 ;; split this insn, into loading up the destination register with one of the
1739 ;; memory operations. If we don't manage to split the insn, reload will
1740 ;; generate the appropriate moves. The reason this is needed, is that combine
1741 ;; has already folded one of the memory references into both the multiply and
1742 ;; add insns, and it can't generate a new pseudo. I.e.:
1743 ;; (set (reg1) (mem (addr1)))
1744 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1745 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1746
1747 (define_insn "fma4_fmadd<mode>4256"
1748 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1749 (plus:FMA4MODEF4
1750 (mult:FMA4MODEF4
1751 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1752 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1753 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1754 "TARGET_FMA4 && TARGET_FUSED_MADD"
1755 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1756 [(set_attr "type" "ssemuladd")
1757 (set_attr "mode" "<MODE>")])
1758
1759 ;; Floating multiply and subtract.
1760 (define_insn "fma4_fmsub<mode>4256"
1761 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1762 (minus:FMA4MODEF4
1763 (mult:FMA4MODEF4
1764 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1765 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1766 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1767 "TARGET_FMA4 && TARGET_FUSED_MADD"
1768 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1769 [(set_attr "type" "ssemuladd")
1770 (set_attr "mode" "<MODE>")])
1771
1772 ;; Floating point negative multiply and add.
1773 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1774 (define_insn "fma4_fnmadd<mode>4256"
1775 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1776 (minus:FMA4MODEF4
1777 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1778 (mult:FMA4MODEF4
1779 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1780 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
1781 "TARGET_FMA4 && TARGET_FUSED_MADD"
1782 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1783 [(set_attr "type" "ssemuladd")
1784 (set_attr "mode" "<MODE>")])
1785
1786 ;; Floating point negative multiply and subtract.
1787 (define_insn "fma4_fnmsub<mode>4256"
1788 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1789 (minus:FMA4MODEF4
1790 (mult:FMA4MODEF4
1791 (neg:FMA4MODEF4
1792 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1793 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1794 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1795 "TARGET_FMA4 && TARGET_FUSED_MADD"
1796 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1797 [(set_attr "type" "ssemuladd")
1798 (set_attr "mode" "<MODE>")])
1799
1800 (define_insn "fma4_fmadd<mode>4"
1801 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1802 (plus:SSEMODEF4
1803 (mult:SSEMODEF4
1804 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1805 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1806 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1807 "TARGET_FMA4 && TARGET_FUSED_MADD"
1808 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1809 [(set_attr "type" "ssemuladd")
1810 (set_attr "mode" "<MODE>")])
1811
1812 ;; For the scalar operations, use operand1 for the upper words that aren't
1813 ;; modified, so restrict the forms that are generated.
1814 ;; Scalar version of fmadd.
1815 (define_insn "fma4_vmfmadd<mode>4"
1816 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1817 (vec_merge:SSEMODEF2P
1818 (plus:SSEMODEF2P
1819 (mult:SSEMODEF2P
1820 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1821 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1822 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1823 (match_dup 0)
1824 (const_int 1)))]
1825 "TARGET_FMA4 && TARGET_FUSED_MADD"
1826 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1827 [(set_attr "type" "ssemuladd")
1828 (set_attr "mode" "<MODE>")])
1829
1830 ;; Floating multiply and subtract.
1831 ;; Allow two memory operands the same as fmadd.
1832 (define_insn "fma4_fmsub<mode>4"
1833 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1834 (minus:SSEMODEF4
1835 (mult:SSEMODEF4
1836 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1837 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1838 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1839 "TARGET_FMA4 && TARGET_FUSED_MADD"
1840 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1841 [(set_attr "type" "ssemuladd")
1842 (set_attr "mode" "<MODE>")])
1843
1844 ;; For the scalar operations, use operand1 for the upper words that aren't
1845 ;; modified, so restrict the forms that are generated.
1846 ;; Scalar version of fmsub.
1847 (define_insn "fma4_vmfmsub<mode>4"
1848 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1849 (vec_merge:SSEMODEF2P
1850 (minus:SSEMODEF2P
1851 (mult:SSEMODEF2P
1852 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1853 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1854 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1855 (match_dup 0)
1856 (const_int 1)))]
1857 "TARGET_FMA4 && TARGET_FUSED_MADD"
1858 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1859 [(set_attr "type" "ssemuladd")
1860 (set_attr "mode" "<MODE>")])
1861
1862 ;; Floating point negative multiply and add.
1863 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1864 (define_insn "fma4_fnmadd<mode>4"
1865 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1866 (minus:SSEMODEF4
1867 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
1868 (mult:SSEMODEF4
1869 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1870 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
1871 "TARGET_FMA4 && TARGET_FUSED_MADD"
1872 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1873 [(set_attr "type" "ssemuladd")
1874 (set_attr "mode" "<MODE>")])
1875
1876 ;; For the scalar operations, use operand1 for the upper words that aren't
1877 ;; modified, so restrict the forms that are generated.
1878 ;; Scalar version of fnmadd.
1879 (define_insn "fma4_vmfnmadd<mode>4"
1880 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1881 (vec_merge:SSEMODEF2P
1882 (minus:SSEMODEF2P
1883 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1884 (mult:SSEMODEF2P
1885 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1886 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
1887 (match_dup 0)
1888 (const_int 1)))]
1889 "TARGET_FMA4 && TARGET_FUSED_MADD"
1890 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1891 [(set_attr "type" "ssemuladd")
1892 (set_attr "mode" "<MODE>")])
1893
1894 ;; Floating point negative multiply and subtract.
1895 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
1896 (define_insn "fma4_fnmsub<mode>4"
1897 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1898 (minus:SSEMODEF4
1899 (mult:SSEMODEF4
1900 (neg:SSEMODEF4
1901 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
1902 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1903 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1904 "TARGET_FMA4 && TARGET_FUSED_MADD"
1905 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1906 [(set_attr "type" "ssemuladd")
1907 (set_attr "mode" "<MODE>")])
1908
1909 ;; For the scalar operations, use operand1 for the upper words that aren't
1910 ;; modified, so restrict the forms that are generated.
1911 ;; Scalar version of fnmsub.
1912 (define_insn "fma4_vmfnmsub<mode>4"
1913 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1914 (vec_merge:SSEMODEF2P
1915 (minus:SSEMODEF2P
1916 (mult:SSEMODEF2P
1917 (neg:SSEMODEF2P
1918 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1919 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1920 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1921 (match_dup 0)
1922 (const_int 1)))]
1923 "TARGET_FMA4 && TARGET_FUSED_MADD"
1924 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1925 [(set_attr "type" "ssemuladd")
1926 (set_attr "mode" "<MODE>")])
1927
1928 (define_insn "fma4i_fmadd<mode>4256"
1929 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1930 (unspec:FMA4MODEF4
1931 [(plus:FMA4MODEF4
1932 (mult:FMA4MODEF4
1933 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1934 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1935 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1936 UNSPEC_FMA4_INTRINSIC))]
1937 "TARGET_FMA4"
1938 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1939 [(set_attr "type" "ssemuladd")
1940 (set_attr "mode" "<MODE>")])
1941
1942 (define_insn "fma4i_fmsub<mode>4256"
1943 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1944 (unspec:FMA4MODEF4
1945 [(minus:FMA4MODEF4
1946 (mult:FMA4MODEF4
1947 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1948 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1949 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1950 UNSPEC_FMA4_INTRINSIC))]
1951 "TARGET_FMA4"
1952 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1953 [(set_attr "type" "ssemuladd")
1954 (set_attr "mode" "<MODE>")])
1955
1956 (define_insn "fma4i_fnmadd<mode>4256"
1957 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1958 (unspec:FMA4MODEF4
1959 [(minus:FMA4MODEF4
1960 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1961 (mult:FMA4MODEF4
1962 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1963 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
1964 UNSPEC_FMA4_INTRINSIC))]
1965 "TARGET_FMA4"
1966 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1967 [(set_attr "type" "ssemuladd")
1968 (set_attr "mode" "<MODE>")])
1969
1970 (define_insn "fma4i_fnmsub<mode>4256"
1971 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1972 (unspec:FMA4MODEF4
1973 [(minus:FMA4MODEF4
1974 (mult:FMA4MODEF4
1975 (neg:FMA4MODEF4
1976 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1977 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1978 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1979 UNSPEC_FMA4_INTRINSIC))]
1980 "TARGET_FMA4"
1981 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1982 [(set_attr "type" "ssemuladd")
1983 (set_attr "mode" "<MODE>")])
1984
1985 (define_insn "fma4i_fmadd<mode>4"
1986 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1987 (unspec:SSEMODEF2P
1988 [(plus:SSEMODEF2P
1989 (mult:SSEMODEF2P
1990 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1991 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1992 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1993 UNSPEC_FMA4_INTRINSIC))]
1994 "TARGET_FMA4"
1995 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1996 [(set_attr "type" "ssemuladd")
1997 (set_attr "mode" "<MODE>")])
1998
1999 (define_insn "fma4i_fmsub<mode>4"
2000 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2001 (unspec:SSEMODEF2P
2002 [(minus:SSEMODEF2P
2003 (mult:SSEMODEF2P
2004 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2005 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2006 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2007 UNSPEC_FMA4_INTRINSIC))]
2008 "TARGET_FMA4"
2009 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2010 [(set_attr "type" "ssemuladd")
2011 (set_attr "mode" "<MODE>")])
2012
2013 (define_insn "fma4i_fnmadd<mode>4"
2014 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2015 (unspec:SSEMODEF2P
2016 [(minus:SSEMODEF2P
2017 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2018 (mult:SSEMODEF2P
2019 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2020 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
2021 UNSPEC_FMA4_INTRINSIC))]
2022 "TARGET_FMA4"
2023 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2024 [(set_attr "type" "ssemuladd")
2025 (set_attr "mode" "<MODE>")])
2026
2027 (define_insn "fma4i_fnmsub<mode>4"
2028 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2029 (unspec:SSEMODEF2P
2030 [(minus:SSEMODEF2P
2031 (mult:SSEMODEF2P
2032 (neg:SSEMODEF2P
2033 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2034 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2035 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2036 UNSPEC_FMA4_INTRINSIC))]
2037 "TARGET_FMA4"
2038 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2039 [(set_attr "type" "ssemuladd")
2040 (set_attr "mode" "<MODE>")])
2041
2042 ;; For the scalar operations, use operand1 for the upper words that aren't
2043 ;; modified, so restrict the forms that are accepted.
2044 (define_insn "fma4i_vmfmadd<mode>4"
2045 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2046 (unspec:SSEMODEF2P
2047 [(vec_merge:SSEMODEF2P
2048 (plus:SSEMODEF2P
2049 (mult:SSEMODEF2P
2050 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2051 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2052 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2053 (match_dup 0)
2054 (const_int 1))]
2055 UNSPEC_FMA4_INTRINSIC))]
2056 "TARGET_FMA4"
2057 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2058 [(set_attr "type" "ssemuladd")
2059 (set_attr "mode" "<ssescalarmode>")])
2060
2061 (define_insn "fma4i_vmfmsub<mode>4"
2062 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2063 (unspec:SSEMODEF2P
2064 [(vec_merge:SSEMODEF2P
2065 (minus:SSEMODEF2P
2066 (mult:SSEMODEF2P
2067 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2068 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2069 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2070 (match_dup 0)
2071 (const_int 1))]
2072 UNSPEC_FMA4_INTRINSIC))]
2073 "TARGET_FMA4"
2074 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2075 [(set_attr "type" "ssemuladd")
2076 (set_attr "mode" "<ssescalarmode>")])
2077
2078 (define_insn "fma4i_vmfnmadd<mode>4"
2079 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2080 (unspec:SSEMODEF2P
2081 [(vec_merge:SSEMODEF2P
2082 (minus:SSEMODEF2P
2083 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2084 (mult:SSEMODEF2P
2085 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2086 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
2087 (match_dup 0)
2088 (const_int 1))]
2089 UNSPEC_FMA4_INTRINSIC))]
2090 "TARGET_FMA4"
2091 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2092 [(set_attr "type" "ssemuladd")
2093 (set_attr "mode" "<ssescalarmode>")])
2094
2095 (define_insn "fma4i_vmfnmsub<mode>4"
2096 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2097 (unspec:SSEMODEF2P
2098 [(vec_merge:SSEMODEF2P
2099 (minus:SSEMODEF2P
2100 (mult:SSEMODEF2P
2101 (neg:SSEMODEF2P
2102 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2103 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2104 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2105 (match_dup 0)
2106 (const_int 1))]
2107 UNSPEC_FMA4_INTRINSIC))]
2108 "TARGET_FMA4"
2109 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2110 [(set_attr "type" "ssemuladd")
2111 (set_attr "mode" "<ssescalarmode>")])
2112
2113 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2114 ;;
2115 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2116 ;;
2117 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2118
2119 (define_insn "fma4_fmaddsubv8sf4"
2120 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2121 (vec_merge:V8SF
2122 (plus:V8SF
2123 (mult:V8SF
2124 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2125 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2126 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2127 (minus:V8SF
2128 (mult:V8SF
2129 (match_dup 1)
2130 (match_dup 2))
2131 (match_dup 3))
2132 (const_int 170)))]
2133 "TARGET_FMA4 && TARGET_FUSED_MADD"
2134 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2135 [(set_attr "type" "ssemuladd")
2136 (set_attr "mode" "V8SF")])
2137
2138 (define_insn "fma4_fmaddsubv4df4"
2139 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2140 (vec_merge:V4DF
2141 (plus:V4DF
2142 (mult:V4DF
2143 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2144 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2145 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2146 (minus:V4DF
2147 (mult:V4DF
2148 (match_dup 1)
2149 (match_dup 2))
2150 (match_dup 3))
2151 (const_int 10)))]
2152 "TARGET_FMA4 && TARGET_FUSED_MADD"
2153 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2154 [(set_attr "type" "ssemuladd")
2155 (set_attr "mode" "V4DF")])
2156
2157 (define_insn "fma4_fmaddsubv4sf4"
2158 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2159 (vec_merge:V4SF
2160 (plus:V4SF
2161 (mult:V4SF
2162 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2163 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2164 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2165 (minus:V4SF
2166 (mult:V4SF
2167 (match_dup 1)
2168 (match_dup 2))
2169 (match_dup 3))
2170 (const_int 10)))]
2171 "TARGET_FMA4 && TARGET_FUSED_MADD"
2172 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2173 [(set_attr "type" "ssemuladd")
2174 (set_attr "mode" "V4SF")])
2175
2176 (define_insn "fma4_fmaddsubv2df4"
2177 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2178 (vec_merge:V2DF
2179 (plus:V2DF
2180 (mult:V2DF
2181 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2182 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2183 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2184 (minus:V2DF
2185 (mult:V2DF
2186 (match_dup 1)
2187 (match_dup 2))
2188 (match_dup 3))
2189 (const_int 2)))]
2190 "TARGET_FMA4 && TARGET_FUSED_MADD"
2191 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2192 [(set_attr "type" "ssemuladd")
2193 (set_attr "mode" "V2DF")])
2194
2195 (define_insn "fma4_fmsubaddv8sf4"
2196 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2197 (vec_merge:V8SF
2198 (plus:V8SF
2199 (mult:V8SF
2200 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2201 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2202 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2203 (minus:V8SF
2204 (mult:V8SF
2205 (match_dup 1)
2206 (match_dup 2))
2207 (match_dup 3))
2208 (const_int 85)))]
2209 "TARGET_FMA4 && TARGET_FUSED_MADD"
2210 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2211 [(set_attr "type" "ssemuladd")
2212 (set_attr "mode" "V8SF")])
2213
2214 (define_insn "fma4_fmsubaddv4df4"
2215 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2216 (vec_merge:V4DF
2217 (plus:V4DF
2218 (mult:V4DF
2219 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2220 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2221 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2222 (minus:V4DF
2223 (mult:V4DF
2224 (match_dup 1)
2225 (match_dup 2))
2226 (match_dup 3))
2227 (const_int 5)))]
2228 "TARGET_FMA4 && TARGET_FUSED_MADD"
2229 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2230 [(set_attr "type" "ssemuladd")
2231 (set_attr "mode" "V4DF")])
2232
2233 (define_insn "fma4_fmsubaddv4sf4"
2234 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2235 (vec_merge:V4SF
2236 (plus:V4SF
2237 (mult:V4SF
2238 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2239 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2240 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2241 (minus:V4SF
2242 (mult:V4SF
2243 (match_dup 1)
2244 (match_dup 2))
2245 (match_dup 3))
2246 (const_int 5)))]
2247 "TARGET_FMA4 && TARGET_FUSED_MADD"
2248 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2249 [(set_attr "type" "ssemuladd")
2250 (set_attr "mode" "V4SF")])
2251
2252 (define_insn "fma4_fmsubaddv2df4"
2253 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2254 (vec_merge:V2DF
2255 (plus:V2DF
2256 (mult:V2DF
2257 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2258 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2259 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2260 (minus:V2DF
2261 (mult:V2DF
2262 (match_dup 1)
2263 (match_dup 2))
2264 (match_dup 3))
2265 (const_int 1)))]
2266 "TARGET_FMA4 && TARGET_FUSED_MADD"
2267 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2268 [(set_attr "type" "ssemuladd")
2269 (set_attr "mode" "V2DF")])
2270
2271 (define_insn "fma4i_fmaddsubv8sf4"
2272 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2273 (unspec:V8SF
2274 [(vec_merge:V8SF
2275 (plus:V8SF
2276 (mult:V8SF
2277 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2278 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2279 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2280 (minus:V8SF
2281 (mult:V8SF
2282 (match_dup 1)
2283 (match_dup 2))
2284 (match_dup 3))
2285 (const_int 170))]
2286 UNSPEC_FMA4_INTRINSIC))]
2287 "TARGET_FMA4"
2288 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2289 [(set_attr "type" "ssemuladd")
2290 (set_attr "mode" "V8SF")])
2291
2292 (define_insn "fma4i_fmaddsubv4df4"
2293 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2294 (unspec:V4DF
2295 [(vec_merge:V4DF
2296 (plus:V4DF
2297 (mult:V4DF
2298 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2299 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2300 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2301 (minus:V4DF
2302 (mult:V4DF
2303 (match_dup 1)
2304 (match_dup 2))
2305 (match_dup 3))
2306 (const_int 10))]
2307 UNSPEC_FMA4_INTRINSIC))]
2308 "TARGET_FMA4"
2309 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2310 [(set_attr "type" "ssemuladd")
2311 (set_attr "mode" "V4DF")])
2312
2313 (define_insn "fma4i_fmaddsubv4sf4"
2314 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2315 (unspec:V4SF
2316 [(vec_merge:V4SF
2317 (plus:V4SF
2318 (mult:V4SF
2319 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2320 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2321 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2322 (minus:V4SF
2323 (mult:V4SF
2324 (match_dup 1)
2325 (match_dup 2))
2326 (match_dup 3))
2327 (const_int 10))]
2328 UNSPEC_FMA4_INTRINSIC))]
2329 "TARGET_FMA4"
2330 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2331 [(set_attr "type" "ssemuladd")
2332 (set_attr "mode" "V4SF")])
2333
2334 (define_insn "fma4i_fmaddsubv2df4"
2335 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2336 (unspec:V2DF
2337 [(vec_merge:V2DF
2338 (plus:V2DF
2339 (mult:V2DF
2340 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2341 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2342 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2343 (minus:V2DF
2344 (mult:V2DF
2345 (match_dup 1)
2346 (match_dup 2))
2347 (match_dup 3))
2348 (const_int 2))]
2349 UNSPEC_FMA4_INTRINSIC))]
2350 "TARGET_FMA4"
2351 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2352 [(set_attr "type" "ssemuladd")
2353 (set_attr "mode" "V2DF")])
2354
2355 (define_insn "fma4i_fmsubaddv8sf4"
2356 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2357 (unspec:V8SF
2358 [(vec_merge:V8SF
2359 (plus:V8SF
2360 (mult:V8SF
2361 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2362 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2363 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2364 (minus:V8SF
2365 (mult:V8SF
2366 (match_dup 1)
2367 (match_dup 2))
2368 (match_dup 3))
2369 (const_int 85))]
2370 UNSPEC_FMA4_INTRINSIC))]
2371 "TARGET_FMA4"
2372 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2373 [(set_attr "type" "ssemuladd")
2374 (set_attr "mode" "V8SF")])
2375
2376 (define_insn "fma4i_fmsubaddv4df4"
2377 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2378 (unspec:V4DF
2379 [(vec_merge:V4DF
2380 (plus:V4DF
2381 (mult:V4DF
2382 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2383 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2384 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2385 (minus:V4DF
2386 (mult:V4DF
2387 (match_dup 1)
2388 (match_dup 2))
2389 (match_dup 3))
2390 (const_int 5))]
2391 UNSPEC_FMA4_INTRINSIC))]
2392 "TARGET_FMA4"
2393 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2394 [(set_attr "type" "ssemuladd")
2395 (set_attr "mode" "V4DF")])
2396
2397 (define_insn "fma4i_fmsubaddv4sf4"
2398 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2399 (unspec:V4SF
2400 [(vec_merge:V4SF
2401 (plus:V4SF
2402 (mult:V4SF
2403 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2404 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2405 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2406 (minus:V4SF
2407 (mult:V4SF
2408 (match_dup 1)
2409 (match_dup 2))
2410 (match_dup 3))
2411 (const_int 5))]
2412 UNSPEC_FMA4_INTRINSIC))]
2413 "TARGET_FMA4"
2414 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2415 [(set_attr "type" "ssemuladd")
2416 (set_attr "mode" "V4SF")])
2417
2418 (define_insn "fma4i_fmsubaddv2df4"
2419 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2420 (unspec:V2DF
2421 [(vec_merge:V2DF
2422 (plus:V2DF
2423 (mult:V2DF
2424 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2425 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2426 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2427 (minus:V2DF
2428 (mult:V2DF
2429 (match_dup 1)
2430 (match_dup 2))
2431 (match_dup 3))
2432 (const_int 1))]
2433 UNSPEC_FMA4_INTRINSIC))]
2434 "TARGET_FMA4"
2435 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2436 [(set_attr "type" "ssemuladd")
2437 (set_attr "mode" "V2DF")])
2438
2439 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2440 ;;
2441 ;; Parallel single-precision floating point conversion operations
2442 ;;
2443 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2444
2445 (define_insn "sse_cvtpi2ps"
2446 [(set (match_operand:V4SF 0 "register_operand" "=x")
2447 (vec_merge:V4SF
2448 (vec_duplicate:V4SF
2449 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2450 (match_operand:V4SF 1 "register_operand" "0")
2451 (const_int 3)))]
2452 "TARGET_SSE"
2453 "cvtpi2ps\t{%2, %0|%0, %2}"
2454 [(set_attr "type" "ssecvt")
2455 (set_attr "mode" "V4SF")])
2456
2457 (define_insn "sse_cvtps2pi"
2458 [(set (match_operand:V2SI 0 "register_operand" "=y")
2459 (vec_select:V2SI
2460 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2461 UNSPEC_FIX_NOTRUNC)
2462 (parallel [(const_int 0) (const_int 1)])))]
2463 "TARGET_SSE"
2464 "cvtps2pi\t{%1, %0|%0, %1}"
2465 [(set_attr "type" "ssecvt")
2466 (set_attr "unit" "mmx")
2467 (set_attr "mode" "DI")])
2468
2469 (define_insn "sse_cvttps2pi"
2470 [(set (match_operand:V2SI 0 "register_operand" "=y")
2471 (vec_select:V2SI
2472 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2473 (parallel [(const_int 0) (const_int 1)])))]
2474 "TARGET_SSE"
2475 "cvttps2pi\t{%1, %0|%0, %1}"
2476 [(set_attr "type" "ssecvt")
2477 (set_attr "unit" "mmx")
2478 (set_attr "prefix_rep" "0")
2479 (set_attr "mode" "SF")])
2480
2481 (define_insn "*avx_cvtsi2ss"
2482 [(set (match_operand:V4SF 0 "register_operand" "=x")
2483 (vec_merge:V4SF
2484 (vec_duplicate:V4SF
2485 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2486 (match_operand:V4SF 1 "register_operand" "x")
2487 (const_int 1)))]
2488 "TARGET_AVX"
2489 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2490 [(set_attr "type" "sseicvt")
2491 (set_attr "prefix" "vex")
2492 (set_attr "mode" "SF")])
2493
2494 (define_insn "sse_cvtsi2ss"
2495 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2496 (vec_merge:V4SF
2497 (vec_duplicate:V4SF
2498 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2499 (match_operand:V4SF 1 "register_operand" "0,0")
2500 (const_int 1)))]
2501 "TARGET_SSE"
2502 "cvtsi2ss\t{%2, %0|%0, %2}"
2503 [(set_attr "type" "sseicvt")
2504 (set_attr "athlon_decode" "vector,double")
2505 (set_attr "amdfam10_decode" "vector,double")
2506 (set_attr "mode" "SF")])
2507
2508 (define_insn "*avx_cvtsi2ssq"
2509 [(set (match_operand:V4SF 0 "register_operand" "=x")
2510 (vec_merge:V4SF
2511 (vec_duplicate:V4SF
2512 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2513 (match_operand:V4SF 1 "register_operand" "x")
2514 (const_int 1)))]
2515 "TARGET_AVX && TARGET_64BIT"
2516 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2517 [(set_attr "type" "sseicvt")
2518 (set_attr "length_vex" "4")
2519 (set_attr "prefix" "vex")
2520 (set_attr "mode" "SF")])
2521
2522 (define_insn "sse_cvtsi2ssq"
2523 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2524 (vec_merge:V4SF
2525 (vec_duplicate:V4SF
2526 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2527 (match_operand:V4SF 1 "register_operand" "0,0")
2528 (const_int 1)))]
2529 "TARGET_SSE && TARGET_64BIT"
2530 "cvtsi2ssq\t{%2, %0|%0, %2}"
2531 [(set_attr "type" "sseicvt")
2532 (set_attr "prefix_rex" "1")
2533 (set_attr "athlon_decode" "vector,double")
2534 (set_attr "amdfam10_decode" "vector,double")
2535 (set_attr "mode" "SF")])
2536
2537 (define_insn "sse_cvtss2si"
2538 [(set (match_operand:SI 0 "register_operand" "=r,r")
2539 (unspec:SI
2540 [(vec_select:SF
2541 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2542 (parallel [(const_int 0)]))]
2543 UNSPEC_FIX_NOTRUNC))]
2544 "TARGET_SSE"
2545 "%vcvtss2si\t{%1, %0|%0, %1}"
2546 [(set_attr "type" "sseicvt")
2547 (set_attr "athlon_decode" "double,vector")
2548 (set_attr "prefix_rep" "1")
2549 (set_attr "prefix" "maybe_vex")
2550 (set_attr "mode" "SI")])
2551
2552 (define_insn "sse_cvtss2si_2"
2553 [(set (match_operand:SI 0 "register_operand" "=r,r")
2554 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2555 UNSPEC_FIX_NOTRUNC))]
2556 "TARGET_SSE"
2557 "%vcvtss2si\t{%1, %0|%0, %1}"
2558 [(set_attr "type" "sseicvt")
2559 (set_attr "athlon_decode" "double,vector")
2560 (set_attr "amdfam10_decode" "double,double")
2561 (set_attr "prefix_rep" "1")
2562 (set_attr "prefix" "maybe_vex")
2563 (set_attr "mode" "SI")])
2564
2565 (define_insn "sse_cvtss2siq"
2566 [(set (match_operand:DI 0 "register_operand" "=r,r")
2567 (unspec:DI
2568 [(vec_select:SF
2569 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2570 (parallel [(const_int 0)]))]
2571 UNSPEC_FIX_NOTRUNC))]
2572 "TARGET_SSE && TARGET_64BIT"
2573 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2574 [(set_attr "type" "sseicvt")
2575 (set_attr "athlon_decode" "double,vector")
2576 (set_attr "prefix_rep" "1")
2577 (set_attr "prefix" "maybe_vex")
2578 (set_attr "mode" "DI")])
2579
2580 (define_insn "sse_cvtss2siq_2"
2581 [(set (match_operand:DI 0 "register_operand" "=r,r")
2582 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2583 UNSPEC_FIX_NOTRUNC))]
2584 "TARGET_SSE && TARGET_64BIT"
2585 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2586 [(set_attr "type" "sseicvt")
2587 (set_attr "athlon_decode" "double,vector")
2588 (set_attr "amdfam10_decode" "double,double")
2589 (set_attr "prefix_rep" "1")
2590 (set_attr "prefix" "maybe_vex")
2591 (set_attr "mode" "DI")])
2592
2593 (define_insn "sse_cvttss2si"
2594 [(set (match_operand:SI 0 "register_operand" "=r,r")
2595 (fix:SI
2596 (vec_select:SF
2597 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2598 (parallel [(const_int 0)]))))]
2599 "TARGET_SSE"
2600 "%vcvttss2si\t{%1, %0|%0, %1}"
2601 [(set_attr "type" "sseicvt")
2602 (set_attr "athlon_decode" "double,vector")
2603 (set_attr "amdfam10_decode" "double,double")
2604 (set_attr "prefix_rep" "1")
2605 (set_attr "prefix" "maybe_vex")
2606 (set_attr "mode" "SI")])
2607
2608 (define_insn "sse_cvttss2siq"
2609 [(set (match_operand:DI 0 "register_operand" "=r,r")
2610 (fix:DI
2611 (vec_select:SF
2612 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2613 (parallel [(const_int 0)]))))]
2614 "TARGET_SSE && TARGET_64BIT"
2615 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2616 [(set_attr "type" "sseicvt")
2617 (set_attr "athlon_decode" "double,vector")
2618 (set_attr "amdfam10_decode" "double,double")
2619 (set_attr "prefix_rep" "1")
2620 (set_attr "prefix" "maybe_vex")
2621 (set_attr "mode" "DI")])
2622
2623 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2624 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2625 (float:AVXMODEDCVTDQ2PS
2626 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2627 "TARGET_AVX"
2628 "vcvtdq2ps\t{%1, %0|%0, %1}"
2629 [(set_attr "type" "ssecvt")
2630 (set_attr "prefix" "vex")
2631 (set_attr "mode" "<avxvecmode>")])
2632
2633 (define_insn "sse2_cvtdq2ps"
2634 [(set (match_operand:V4SF 0 "register_operand" "=x")
2635 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2636 "TARGET_SSE2"
2637 "cvtdq2ps\t{%1, %0|%0, %1}"
2638 [(set_attr "type" "ssecvt")
2639 (set_attr "mode" "V4SF")])
2640
2641 (define_expand "sse2_cvtudq2ps"
2642 [(set (match_dup 5)
2643 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2644 (set (match_dup 6)
2645 (lt:V4SF (match_dup 5) (match_dup 3)))
2646 (set (match_dup 7)
2647 (and:V4SF (match_dup 6) (match_dup 4)))
2648 (set (match_operand:V4SF 0 "register_operand" "")
2649 (plus:V4SF (match_dup 5) (match_dup 7)))]
2650 "TARGET_SSE2"
2651 {
2652 REAL_VALUE_TYPE TWO32r;
2653 rtx x;
2654 int i;
2655
2656 real_ldexp (&TWO32r, &dconst1, 32);
2657 x = const_double_from_real_value (TWO32r, SFmode);
2658
2659 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2660 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2661
2662 for (i = 5; i < 8; i++)
2663 operands[i] = gen_reg_rtx (V4SFmode);
2664 })
2665
2666 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2667 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2668 (unspec:AVXMODEDCVTPS2DQ
2669 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2670 UNSPEC_FIX_NOTRUNC))]
2671 "TARGET_AVX"
2672 "vcvtps2dq\t{%1, %0|%0, %1}"
2673 [(set_attr "type" "ssecvt")
2674 (set_attr "prefix" "vex")
2675 (set_attr "mode" "<avxvecmode>")])
2676
2677 (define_insn "sse2_cvtps2dq"
2678 [(set (match_operand:V4SI 0 "register_operand" "=x")
2679 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2680 UNSPEC_FIX_NOTRUNC))]
2681 "TARGET_SSE2"
2682 "cvtps2dq\t{%1, %0|%0, %1}"
2683 [(set_attr "type" "ssecvt")
2684 (set_attr "prefix_data16" "1")
2685 (set_attr "mode" "TI")])
2686
2687 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2688 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2689 (fix:AVXMODEDCVTPS2DQ
2690 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2691 "TARGET_AVX"
2692 "vcvttps2dq\t{%1, %0|%0, %1}"
2693 [(set_attr "type" "ssecvt")
2694 (set_attr "prefix" "vex")
2695 (set_attr "mode" "<avxvecmode>")])
2696
2697 (define_insn "sse2_cvttps2dq"
2698 [(set (match_operand:V4SI 0 "register_operand" "=x")
2699 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2700 "TARGET_SSE2"
2701 "cvttps2dq\t{%1, %0|%0, %1}"
2702 [(set_attr "type" "ssecvt")
2703 (set_attr "prefix_rep" "1")
2704 (set_attr "prefix_data16" "0")
2705 (set_attr "mode" "TI")])
2706
2707 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2708 ;;
2709 ;; Parallel double-precision floating point conversion operations
2710 ;;
2711 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2712
2713 (define_insn "sse2_cvtpi2pd"
2714 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2715 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2716 "TARGET_SSE2"
2717 "cvtpi2pd\t{%1, %0|%0, %1}"
2718 [(set_attr "type" "ssecvt")
2719 (set_attr "unit" "mmx,*")
2720 (set_attr "prefix_data16" "1,*")
2721 (set_attr "mode" "V2DF")])
2722
2723 (define_insn "sse2_cvtpd2pi"
2724 [(set (match_operand:V2SI 0 "register_operand" "=y")
2725 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2726 UNSPEC_FIX_NOTRUNC))]
2727 "TARGET_SSE2"
2728 "cvtpd2pi\t{%1, %0|%0, %1}"
2729 [(set_attr "type" "ssecvt")
2730 (set_attr "unit" "mmx")
2731 (set_attr "prefix_data16" "1")
2732 (set_attr "mode" "DI")])
2733
2734 (define_insn "sse2_cvttpd2pi"
2735 [(set (match_operand:V2SI 0 "register_operand" "=y")
2736 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2737 "TARGET_SSE2"
2738 "cvttpd2pi\t{%1, %0|%0, %1}"
2739 [(set_attr "type" "ssecvt")
2740 (set_attr "unit" "mmx")
2741 (set_attr "prefix_data16" "1")
2742 (set_attr "mode" "TI")])
2743
2744 (define_insn "*avx_cvtsi2sd"
2745 [(set (match_operand:V2DF 0 "register_operand" "=x")
2746 (vec_merge:V2DF
2747 (vec_duplicate:V2DF
2748 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2749 (match_operand:V2DF 1 "register_operand" "x")
2750 (const_int 1)))]
2751 "TARGET_AVX"
2752 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2753 [(set_attr "type" "sseicvt")
2754 (set_attr "prefix" "vex")
2755 (set_attr "mode" "DF")])
2756
2757 (define_insn "sse2_cvtsi2sd"
2758 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2759 (vec_merge:V2DF
2760 (vec_duplicate:V2DF
2761 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2762 (match_operand:V2DF 1 "register_operand" "0,0")
2763 (const_int 1)))]
2764 "TARGET_SSE2"
2765 "cvtsi2sd\t{%2, %0|%0, %2}"
2766 [(set_attr "type" "sseicvt")
2767 (set_attr "mode" "DF")
2768 (set_attr "athlon_decode" "double,direct")
2769 (set_attr "amdfam10_decode" "vector,double")])
2770
2771 (define_insn "*avx_cvtsi2sdq"
2772 [(set (match_operand:V2DF 0 "register_operand" "=x")
2773 (vec_merge:V2DF
2774 (vec_duplicate:V2DF
2775 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2776 (match_operand:V2DF 1 "register_operand" "x")
2777 (const_int 1)))]
2778 "TARGET_AVX && TARGET_64BIT"
2779 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2780 [(set_attr "type" "sseicvt")
2781 (set_attr "length_vex" "4")
2782 (set_attr "prefix" "vex")
2783 (set_attr "mode" "DF")])
2784
2785 (define_insn "sse2_cvtsi2sdq"
2786 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2787 (vec_merge:V2DF
2788 (vec_duplicate:V2DF
2789 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2790 (match_operand:V2DF 1 "register_operand" "0,0")
2791 (const_int 1)))]
2792 "TARGET_SSE2 && TARGET_64BIT"
2793 "cvtsi2sdq\t{%2, %0|%0, %2}"
2794 [(set_attr "type" "sseicvt")
2795 (set_attr "prefix_rex" "1")
2796 (set_attr "mode" "DF")
2797 (set_attr "athlon_decode" "double,direct")
2798 (set_attr "amdfam10_decode" "vector,double")])
2799
2800 (define_insn "sse2_cvtsd2si"
2801 [(set (match_operand:SI 0 "register_operand" "=r,r")
2802 (unspec:SI
2803 [(vec_select:DF
2804 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2805 (parallel [(const_int 0)]))]
2806 UNSPEC_FIX_NOTRUNC))]
2807 "TARGET_SSE2"
2808 "%vcvtsd2si\t{%1, %0|%0, %1}"
2809 [(set_attr "type" "sseicvt")
2810 (set_attr "athlon_decode" "double,vector")
2811 (set_attr "prefix_rep" "1")
2812 (set_attr "prefix" "maybe_vex")
2813 (set_attr "mode" "SI")])
2814
2815 (define_insn "sse2_cvtsd2si_2"
2816 [(set (match_operand:SI 0 "register_operand" "=r,r")
2817 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2818 UNSPEC_FIX_NOTRUNC))]
2819 "TARGET_SSE2"
2820 "%vcvtsd2si\t{%1, %0|%0, %1}"
2821 [(set_attr "type" "sseicvt")
2822 (set_attr "athlon_decode" "double,vector")
2823 (set_attr "amdfam10_decode" "double,double")
2824 (set_attr "prefix_rep" "1")
2825 (set_attr "prefix" "maybe_vex")
2826 (set_attr "mode" "SI")])
2827
2828 (define_insn "sse2_cvtsd2siq"
2829 [(set (match_operand:DI 0 "register_operand" "=r,r")
2830 (unspec:DI
2831 [(vec_select:DF
2832 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2833 (parallel [(const_int 0)]))]
2834 UNSPEC_FIX_NOTRUNC))]
2835 "TARGET_SSE2 && TARGET_64BIT"
2836 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2837 [(set_attr "type" "sseicvt")
2838 (set_attr "athlon_decode" "double,vector")
2839 (set_attr "prefix_rep" "1")
2840 (set_attr "prefix" "maybe_vex")
2841 (set_attr "mode" "DI")])
2842
2843 (define_insn "sse2_cvtsd2siq_2"
2844 [(set (match_operand:DI 0 "register_operand" "=r,r")
2845 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2846 UNSPEC_FIX_NOTRUNC))]
2847 "TARGET_SSE2 && TARGET_64BIT"
2848 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2849 [(set_attr "type" "sseicvt")
2850 (set_attr "athlon_decode" "double,vector")
2851 (set_attr "amdfam10_decode" "double,double")
2852 (set_attr "prefix_rep" "1")
2853 (set_attr "prefix" "maybe_vex")
2854 (set_attr "mode" "DI")])
2855
2856 (define_insn "sse2_cvttsd2si"
2857 [(set (match_operand:SI 0 "register_operand" "=r,r")
2858 (fix:SI
2859 (vec_select:DF
2860 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2861 (parallel [(const_int 0)]))))]
2862 "TARGET_SSE2"
2863 "%vcvttsd2si\t{%1, %0|%0, %1}"
2864 [(set_attr "type" "sseicvt")
2865 (set_attr "prefix_rep" "1")
2866 (set_attr "prefix" "maybe_vex")
2867 (set_attr "mode" "SI")
2868 (set_attr "athlon_decode" "double,vector")
2869 (set_attr "amdfam10_decode" "double,double")])
2870
2871 (define_insn "sse2_cvttsd2siq"
2872 [(set (match_operand:DI 0 "register_operand" "=r,r")
2873 (fix:DI
2874 (vec_select:DF
2875 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2876 (parallel [(const_int 0)]))))]
2877 "TARGET_SSE2 && TARGET_64BIT"
2878 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2879 [(set_attr "type" "sseicvt")
2880 (set_attr "prefix_rep" "1")
2881 (set_attr "prefix" "maybe_vex")
2882 (set_attr "mode" "DI")
2883 (set_attr "athlon_decode" "double,vector")
2884 (set_attr "amdfam10_decode" "double,double")])
2885
2886 (define_insn "avx_cvtdq2pd256"
2887 [(set (match_operand:V4DF 0 "register_operand" "=x")
2888 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2889 "TARGET_AVX"
2890 "vcvtdq2pd\t{%1, %0|%0, %1}"
2891 [(set_attr "type" "ssecvt")
2892 (set_attr "prefix" "vex")
2893 (set_attr "mode" "V4DF")])
2894
2895 (define_insn "sse2_cvtdq2pd"
2896 [(set (match_operand:V2DF 0 "register_operand" "=x")
2897 (float:V2DF
2898 (vec_select:V2SI
2899 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2900 (parallel [(const_int 0) (const_int 1)]))))]
2901 "TARGET_SSE2"
2902 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2903 [(set_attr "type" "ssecvt")
2904 (set_attr "prefix" "maybe_vex")
2905 (set_attr "mode" "V2DF")])
2906
2907 (define_insn "avx_cvtpd2dq256"
2908 [(set (match_operand:V4SI 0 "register_operand" "=x")
2909 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2910 UNSPEC_FIX_NOTRUNC))]
2911 "TARGET_AVX"
2912 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2913 [(set_attr "type" "ssecvt")
2914 (set_attr "prefix" "vex")
2915 (set_attr "mode" "OI")])
2916
2917 (define_expand "sse2_cvtpd2dq"
2918 [(set (match_operand:V4SI 0 "register_operand" "")
2919 (vec_concat:V4SI
2920 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2921 UNSPEC_FIX_NOTRUNC)
2922 (match_dup 2)))]
2923 "TARGET_SSE2"
2924 "operands[2] = CONST0_RTX (V2SImode);")
2925
2926 (define_insn "*sse2_cvtpd2dq"
2927 [(set (match_operand:V4SI 0 "register_operand" "=x")
2928 (vec_concat:V4SI
2929 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2930 UNSPEC_FIX_NOTRUNC)
2931 (match_operand:V2SI 2 "const0_operand" "")))]
2932 "TARGET_SSE2"
2933 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2934 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2935 [(set_attr "type" "ssecvt")
2936 (set_attr "prefix_rep" "1")
2937 (set_attr "prefix_data16" "0")
2938 (set_attr "prefix" "maybe_vex")
2939 (set_attr "mode" "TI")
2940 (set_attr "amdfam10_decode" "double")])
2941
2942 (define_insn "avx_cvttpd2dq256"
2943 [(set (match_operand:V4SI 0 "register_operand" "=x")
2944 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2945 "TARGET_AVX"
2946 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2947 [(set_attr "type" "ssecvt")
2948 (set_attr "prefix" "vex")
2949 (set_attr "mode" "OI")])
2950
2951 (define_expand "sse2_cvttpd2dq"
2952 [(set (match_operand:V4SI 0 "register_operand" "")
2953 (vec_concat:V4SI
2954 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2955 (match_dup 2)))]
2956 "TARGET_SSE2"
2957 "operands[2] = CONST0_RTX (V2SImode);")
2958
2959 (define_insn "*sse2_cvttpd2dq"
2960 [(set (match_operand:V4SI 0 "register_operand" "=x")
2961 (vec_concat:V4SI
2962 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2963 (match_operand:V2SI 2 "const0_operand" "")))]
2964 "TARGET_SSE2"
2965 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2966 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2967 [(set_attr "type" "ssecvt")
2968 (set_attr "prefix" "maybe_vex")
2969 (set_attr "mode" "TI")
2970 (set_attr "amdfam10_decode" "double")])
2971
2972 (define_insn "*avx_cvtsd2ss"
2973 [(set (match_operand:V4SF 0 "register_operand" "=x")
2974 (vec_merge:V4SF
2975 (vec_duplicate:V4SF
2976 (float_truncate:V2SF
2977 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2978 (match_operand:V4SF 1 "register_operand" "x")
2979 (const_int 1)))]
2980 "TARGET_AVX"
2981 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2982 [(set_attr "type" "ssecvt")
2983 (set_attr "prefix" "vex")
2984 (set_attr "mode" "SF")])
2985
2986 (define_insn "sse2_cvtsd2ss"
2987 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2988 (vec_merge:V4SF
2989 (vec_duplicate:V4SF
2990 (float_truncate:V2SF
2991 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2992 (match_operand:V4SF 1 "register_operand" "0,0")
2993 (const_int 1)))]
2994 "TARGET_SSE2"
2995 "cvtsd2ss\t{%2, %0|%0, %2}"
2996 [(set_attr "type" "ssecvt")
2997 (set_attr "athlon_decode" "vector,double")
2998 (set_attr "amdfam10_decode" "vector,double")
2999 (set_attr "mode" "SF")])
3000
3001 (define_insn "*avx_cvtss2sd"
3002 [(set (match_operand:V2DF 0 "register_operand" "=x")
3003 (vec_merge:V2DF
3004 (float_extend:V2DF
3005 (vec_select:V2SF
3006 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
3007 (parallel [(const_int 0) (const_int 1)])))
3008 (match_operand:V2DF 1 "register_operand" "x")
3009 (const_int 1)))]
3010 "TARGET_AVX"
3011 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
3012 [(set_attr "type" "ssecvt")
3013 (set_attr "prefix" "vex")
3014 (set_attr "mode" "DF")])
3015
3016 (define_insn "sse2_cvtss2sd"
3017 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3018 (vec_merge:V2DF
3019 (float_extend:V2DF
3020 (vec_select:V2SF
3021 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
3022 (parallel [(const_int 0) (const_int 1)])))
3023 (match_operand:V2DF 1 "register_operand" "0,0")
3024 (const_int 1)))]
3025 "TARGET_SSE2"
3026 "cvtss2sd\t{%2, %0|%0, %2}"
3027 [(set_attr "type" "ssecvt")
3028 (set_attr "amdfam10_decode" "vector,double")
3029 (set_attr "mode" "DF")])
3030
3031 (define_insn "avx_cvtpd2ps256"
3032 [(set (match_operand:V4SF 0 "register_operand" "=x")
3033 (float_truncate:V4SF
3034 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3035 "TARGET_AVX"
3036 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3037 [(set_attr "type" "ssecvt")
3038 (set_attr "prefix" "vex")
3039 (set_attr "mode" "V4SF")])
3040
3041 (define_expand "sse2_cvtpd2ps"
3042 [(set (match_operand:V4SF 0 "register_operand" "")
3043 (vec_concat:V4SF
3044 (float_truncate:V2SF
3045 (match_operand:V2DF 1 "nonimmediate_operand" ""))
3046 (match_dup 2)))]
3047 "TARGET_SSE2"
3048 "operands[2] = CONST0_RTX (V2SFmode);")
3049
3050 (define_insn "*sse2_cvtpd2ps"
3051 [(set (match_operand:V4SF 0 "register_operand" "=x")
3052 (vec_concat:V4SF
3053 (float_truncate:V2SF
3054 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3055 (match_operand:V2SF 2 "const0_operand" "")))]
3056 "TARGET_SSE2"
3057 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3058 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3059 [(set_attr "type" "ssecvt")
3060 (set_attr "prefix_data16" "1")
3061 (set_attr "prefix" "maybe_vex")
3062 (set_attr "mode" "V4SF")
3063 (set_attr "amdfam10_decode" "double")])
3064
3065 (define_insn "avx_cvtps2pd256"
3066 [(set (match_operand:V4DF 0 "register_operand" "=x")
3067 (float_extend:V4DF
3068 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3069 "TARGET_AVX"
3070 "vcvtps2pd\t{%1, %0|%0, %1}"
3071 [(set_attr "type" "ssecvt")
3072 (set_attr "prefix" "vex")
3073 (set_attr "mode" "V4DF")])
3074
3075 (define_insn "sse2_cvtps2pd"
3076 [(set (match_operand:V2DF 0 "register_operand" "=x")
3077 (float_extend:V2DF
3078 (vec_select:V2SF
3079 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3080 (parallel [(const_int 0) (const_int 1)]))))]
3081 "TARGET_SSE2"
3082 "%vcvtps2pd\t{%1, %0|%0, %1}"
3083 [(set_attr "type" "ssecvt")
3084 (set_attr "prefix" "maybe_vex")
3085 (set_attr "mode" "V2DF")
3086 (set_attr "prefix_data16" "0")
3087 (set_attr "amdfam10_decode" "direct")])
3088
3089 (define_expand "vec_unpacks_hi_v4sf"
3090 [(set (match_dup 2)
3091 (vec_select:V4SF
3092 (vec_concat:V8SF
3093 (match_dup 2)
3094 (match_operand:V4SF 1 "nonimmediate_operand" ""))
3095 (parallel [(const_int 6)
3096 (const_int 7)
3097 (const_int 2)
3098 (const_int 3)])))
3099 (set (match_operand:V2DF 0 "register_operand" "")
3100 (float_extend:V2DF
3101 (vec_select:V2SF
3102 (match_dup 2)
3103 (parallel [(const_int 0) (const_int 1)]))))]
3104 "TARGET_SSE2"
3105 "operands[2] = gen_reg_rtx (V4SFmode);")
3106
3107 (define_expand "vec_unpacks_lo_v4sf"
3108 [(set (match_operand:V2DF 0 "register_operand" "")
3109 (float_extend:V2DF
3110 (vec_select:V2SF
3111 (match_operand:V4SF 1 "nonimmediate_operand" "")
3112 (parallel [(const_int 0) (const_int 1)]))))]
3113 "TARGET_SSE2")
3114
3115 (define_expand "vec_unpacks_float_hi_v8hi"
3116 [(match_operand:V4SF 0 "register_operand" "")
3117 (match_operand:V8HI 1 "register_operand" "")]
3118 "TARGET_SSE2"
3119 {
3120 rtx tmp = gen_reg_rtx (V4SImode);
3121
3122 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3123 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3124 DONE;
3125 })
3126
3127 (define_expand "vec_unpacks_float_lo_v8hi"
3128 [(match_operand:V4SF 0 "register_operand" "")
3129 (match_operand:V8HI 1 "register_operand" "")]
3130 "TARGET_SSE2"
3131 {
3132 rtx tmp = gen_reg_rtx (V4SImode);
3133
3134 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3135 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3136 DONE;
3137 })
3138
3139 (define_expand "vec_unpacku_float_hi_v8hi"
3140 [(match_operand:V4SF 0 "register_operand" "")
3141 (match_operand:V8HI 1 "register_operand" "")]
3142 "TARGET_SSE2"
3143 {
3144 rtx tmp = gen_reg_rtx (V4SImode);
3145
3146 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3147 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3148 DONE;
3149 })
3150
3151 (define_expand "vec_unpacku_float_lo_v8hi"
3152 [(match_operand:V4SF 0 "register_operand" "")
3153 (match_operand:V8HI 1 "register_operand" "")]
3154 "TARGET_SSE2"
3155 {
3156 rtx tmp = gen_reg_rtx (V4SImode);
3157
3158 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3159 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3160 DONE;
3161 })
3162
3163 (define_expand "vec_unpacks_float_hi_v4si"
3164 [(set (match_dup 2)
3165 (vec_select:V4SI
3166 (match_operand:V4SI 1 "nonimmediate_operand" "")
3167 (parallel [(const_int 2)
3168 (const_int 3)
3169 (const_int 2)
3170 (const_int 3)])))
3171 (set (match_operand:V2DF 0 "register_operand" "")
3172 (float:V2DF
3173 (vec_select:V2SI
3174 (match_dup 2)
3175 (parallel [(const_int 0) (const_int 1)]))))]
3176 "TARGET_SSE2"
3177 "operands[2] = gen_reg_rtx (V4SImode);")
3178
3179 (define_expand "vec_unpacks_float_lo_v4si"
3180 [(set (match_operand:V2DF 0 "register_operand" "")
3181 (float:V2DF
3182 (vec_select:V2SI
3183 (match_operand:V4SI 1 "nonimmediate_operand" "")
3184 (parallel [(const_int 0) (const_int 1)]))))]
3185 "TARGET_SSE2")
3186
3187 (define_expand "vec_unpacku_float_hi_v4si"
3188 [(set (match_dup 5)
3189 (vec_select:V4SI
3190 (match_operand:V4SI 1 "nonimmediate_operand" "")
3191 (parallel [(const_int 2)
3192 (const_int 3)
3193 (const_int 2)
3194 (const_int 3)])))
3195 (set (match_dup 6)
3196 (float:V2DF
3197 (vec_select:V2SI
3198 (match_dup 5)
3199 (parallel [(const_int 0) (const_int 1)]))))
3200 (set (match_dup 7)
3201 (lt:V2DF (match_dup 6) (match_dup 3)))
3202 (set (match_dup 8)
3203 (and:V2DF (match_dup 7) (match_dup 4)))
3204 (set (match_operand:V2DF 0 "register_operand" "")
3205 (plus:V2DF (match_dup 6) (match_dup 8)))]
3206 "TARGET_SSE2"
3207 {
3208 REAL_VALUE_TYPE TWO32r;
3209 rtx x;
3210 int i;
3211
3212 real_ldexp (&TWO32r, &dconst1, 32);
3213 x = const_double_from_real_value (TWO32r, DFmode);
3214
3215 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3216 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3217
3218 operands[5] = gen_reg_rtx (V4SImode);
3219
3220 for (i = 6; i < 9; i++)
3221 operands[i] = gen_reg_rtx (V2DFmode);
3222 })
3223
3224 (define_expand "vec_unpacku_float_lo_v4si"
3225 [(set (match_dup 5)
3226 (float:V2DF
3227 (vec_select:V2SI
3228 (match_operand:V4SI 1 "nonimmediate_operand" "")
3229 (parallel [(const_int 0) (const_int 1)]))))
3230 (set (match_dup 6)
3231 (lt:V2DF (match_dup 5) (match_dup 3)))
3232 (set (match_dup 7)
3233 (and:V2DF (match_dup 6) (match_dup 4)))
3234 (set (match_operand:V2DF 0 "register_operand" "")
3235 (plus:V2DF (match_dup 5) (match_dup 7)))]
3236 "TARGET_SSE2"
3237 {
3238 REAL_VALUE_TYPE TWO32r;
3239 rtx x;
3240 int i;
3241
3242 real_ldexp (&TWO32r, &dconst1, 32);
3243 x = const_double_from_real_value (TWO32r, DFmode);
3244
3245 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3246 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3247
3248 for (i = 5; i < 8; i++)
3249 operands[i] = gen_reg_rtx (V2DFmode);
3250 })
3251
3252 (define_expand "vec_pack_trunc_v2df"
3253 [(match_operand:V4SF 0 "register_operand" "")
3254 (match_operand:V2DF 1 "nonimmediate_operand" "")
3255 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3256 "TARGET_SSE2"
3257 {
3258 rtx r1, r2;
3259
3260 r1 = gen_reg_rtx (V4SFmode);
3261 r2 = gen_reg_rtx (V4SFmode);
3262
3263 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3264 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3265 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3266 DONE;
3267 })
3268
3269 (define_expand "vec_pack_sfix_trunc_v2df"
3270 [(match_operand:V4SI 0 "register_operand" "")
3271 (match_operand:V2DF 1 "nonimmediate_operand" "")
3272 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3273 "TARGET_SSE2"
3274 {
3275 rtx r1, r2;
3276
3277 r1 = gen_reg_rtx (V4SImode);
3278 r2 = gen_reg_rtx (V4SImode);
3279
3280 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3281 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3282 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3283 gen_lowpart (V2DImode, r1),
3284 gen_lowpart (V2DImode, r2)));
3285 DONE;
3286 })
3287
3288 (define_expand "vec_pack_sfix_v2df"
3289 [(match_operand:V4SI 0 "register_operand" "")
3290 (match_operand:V2DF 1 "nonimmediate_operand" "")
3291 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3292 "TARGET_SSE2"
3293 {
3294 rtx r1, r2;
3295
3296 r1 = gen_reg_rtx (V4SImode);
3297 r2 = gen_reg_rtx (V4SImode);
3298
3299 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3300 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3301 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3302 gen_lowpart (V2DImode, r1),
3303 gen_lowpart (V2DImode, r2)));
3304 DONE;
3305 })
3306
3307 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3308 ;;
3309 ;; Parallel single-precision floating point element swizzling
3310 ;;
3311 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3312
3313 (define_expand "sse_movhlps_exp"
3314 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3315 (vec_select:V4SF
3316 (vec_concat:V8SF
3317 (match_operand:V4SF 1 "nonimmediate_operand" "")
3318 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3319 (parallel [(const_int 6)
3320 (const_int 7)
3321 (const_int 2)
3322 (const_int 3)])))]
3323 "TARGET_SSE"
3324 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3325
3326 (define_insn "*avx_movhlps"
3327 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3328 (vec_select:V4SF
3329 (vec_concat:V8SF
3330 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3331 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3332 (parallel [(const_int 6)
3333 (const_int 7)
3334 (const_int 2)
3335 (const_int 3)])))]
3336 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3337 "@
3338 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3339 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3340 vmovhps\t{%2, %0|%0, %2}"
3341 [(set_attr "type" "ssemov")
3342 (set_attr "prefix" "vex")
3343 (set_attr "mode" "V4SF,V2SF,V2SF")])
3344
3345 (define_insn "sse_movhlps"
3346 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3347 (vec_select:V4SF
3348 (vec_concat:V8SF
3349 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3350 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3351 (parallel [(const_int 6)
3352 (const_int 7)
3353 (const_int 2)
3354 (const_int 3)])))]
3355 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3356 "@
3357 movhlps\t{%2, %0|%0, %2}
3358 movlps\t{%H2, %0|%0, %H2}
3359 movhps\t{%2, %0|%0, %2}"
3360 [(set_attr "type" "ssemov")
3361 (set_attr "mode" "V4SF,V2SF,V2SF")])
3362
3363 (define_expand "sse_movlhps_exp"
3364 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3365 (vec_select:V4SF
3366 (vec_concat:V8SF
3367 (match_operand:V4SF 1 "nonimmediate_operand" "")
3368 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3369 (parallel [(const_int 0)
3370 (const_int 1)
3371 (const_int 4)
3372 (const_int 5)])))]
3373 "TARGET_SSE"
3374 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3375
3376 (define_insn "*avx_movlhps"
3377 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3378 (vec_select:V4SF
3379 (vec_concat:V8SF
3380 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3381 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3382 (parallel [(const_int 0)
3383 (const_int 1)
3384 (const_int 4)
3385 (const_int 5)])))]
3386 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3387 "@
3388 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3389 vmovhps\t{%2, %1, %0|%0, %1, %2}
3390 vmovlps\t{%2, %H0|%H0, %2}"
3391 [(set_attr "type" "ssemov")
3392 (set_attr "prefix" "vex")
3393 (set_attr "mode" "V4SF,V2SF,V2SF")])
3394
3395 (define_insn "sse_movlhps"
3396 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3397 (vec_select:V4SF
3398 (vec_concat:V8SF
3399 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3400 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3401 (parallel [(const_int 0)
3402 (const_int 1)
3403 (const_int 4)
3404 (const_int 5)])))]
3405 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3406 "@
3407 movlhps\t{%2, %0|%0, %2}
3408 movhps\t{%2, %0|%0, %2}
3409 movlps\t{%2, %H0|%H0, %2}"
3410 [(set_attr "type" "ssemov")
3411 (set_attr "mode" "V4SF,V2SF,V2SF")])
3412
3413 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3414 (define_insn "avx_unpckhps256"
3415 [(set (match_operand:V8SF 0 "register_operand" "=x")
3416 (vec_select:V8SF
3417 (vec_concat:V16SF
3418 (match_operand:V8SF 1 "register_operand" "x")
3419 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3420 (parallel [(const_int 2) (const_int 10)
3421 (const_int 3) (const_int 11)
3422 (const_int 6) (const_int 14)
3423 (const_int 7) (const_int 15)])))]
3424 "TARGET_AVX"
3425 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3426 [(set_attr "type" "sselog")
3427 (set_attr "prefix" "vex")
3428 (set_attr "mode" "V8SF")])
3429
3430 (define_insn "*avx_interleave_highv4sf"
3431 [(set (match_operand:V4SF 0 "register_operand" "=x")
3432 (vec_select:V4SF
3433 (vec_concat:V8SF
3434 (match_operand:V4SF 1 "register_operand" "x")
3435 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3436 (parallel [(const_int 2) (const_int 6)
3437 (const_int 3) (const_int 7)])))]
3438 "TARGET_AVX"
3439 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3440 [(set_attr "type" "sselog")
3441 (set_attr "prefix" "vex")
3442 (set_attr "mode" "V4SF")])
3443
3444 (define_insn "vec_interleave_highv4sf"
3445 [(set (match_operand:V4SF 0 "register_operand" "=x")
3446 (vec_select:V4SF
3447 (vec_concat:V8SF
3448 (match_operand:V4SF 1 "register_operand" "0")
3449 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3450 (parallel [(const_int 2) (const_int 6)
3451 (const_int 3) (const_int 7)])))]
3452 "TARGET_SSE"
3453 "unpckhps\t{%2, %0|%0, %2}"
3454 [(set_attr "type" "sselog")
3455 (set_attr "mode" "V4SF")])
3456
3457 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3458 (define_insn "avx_unpcklps256"
3459 [(set (match_operand:V8SF 0 "register_operand" "=x")
3460 (vec_select:V8SF
3461 (vec_concat:V16SF
3462 (match_operand:V8SF 1 "register_operand" "x")
3463 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3464 (parallel [(const_int 0) (const_int 8)
3465 (const_int 1) (const_int 9)
3466 (const_int 4) (const_int 12)
3467 (const_int 5) (const_int 13)])))]
3468 "TARGET_AVX"
3469 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3470 [(set_attr "type" "sselog")
3471 (set_attr "prefix" "vex")
3472 (set_attr "mode" "V8SF")])
3473
3474 (define_insn "*avx_interleave_lowv4sf"
3475 [(set (match_operand:V4SF 0 "register_operand" "=x")
3476 (vec_select:V4SF
3477 (vec_concat:V8SF
3478 (match_operand:V4SF 1 "register_operand" "x")
3479 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3480 (parallel [(const_int 0) (const_int 4)
3481 (const_int 1) (const_int 5)])))]
3482 "TARGET_AVX"
3483 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3484 [(set_attr "type" "sselog")
3485 (set_attr "prefix" "vex")
3486 (set_attr "mode" "V4SF")])
3487
3488 (define_insn "vec_interleave_lowv4sf"
3489 [(set (match_operand:V4SF 0 "register_operand" "=x")
3490 (vec_select:V4SF
3491 (vec_concat:V8SF
3492 (match_operand:V4SF 1 "register_operand" "0")
3493 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3494 (parallel [(const_int 0) (const_int 4)
3495 (const_int 1) (const_int 5)])))]
3496 "TARGET_SSE"
3497 "unpcklps\t{%2, %0|%0, %2}"
3498 [(set_attr "type" "sselog")
3499 (set_attr "mode" "V4SF")])
3500
3501 ;; These are modeled with the same vec_concat as the others so that we
3502 ;; capture users of shufps that can use the new instructions
3503 (define_insn "avx_movshdup256"
3504 [(set (match_operand:V8SF 0 "register_operand" "=x")
3505 (vec_select:V8SF
3506 (vec_concat:V16SF
3507 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3508 (match_dup 1))
3509 (parallel [(const_int 1) (const_int 1)
3510 (const_int 3) (const_int 3)
3511 (const_int 5) (const_int 5)
3512 (const_int 7) (const_int 7)])))]
3513 "TARGET_AVX"
3514 "vmovshdup\t{%1, %0|%0, %1}"
3515 [(set_attr "type" "sse")
3516 (set_attr "prefix" "vex")
3517 (set_attr "mode" "V8SF")])
3518
3519 (define_insn "sse3_movshdup"
3520 [(set (match_operand:V4SF 0 "register_operand" "=x")
3521 (vec_select:V4SF
3522 (vec_concat:V8SF
3523 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3524 (match_dup 1))
3525 (parallel [(const_int 1)
3526 (const_int 1)
3527 (const_int 7)
3528 (const_int 7)])))]
3529 "TARGET_SSE3"
3530 "%vmovshdup\t{%1, %0|%0, %1}"
3531 [(set_attr "type" "sse")
3532 (set_attr "prefix_rep" "1")
3533 (set_attr "prefix" "maybe_vex")
3534 (set_attr "mode" "V4SF")])
3535
3536 (define_insn "avx_movsldup256"
3537 [(set (match_operand:V8SF 0 "register_operand" "=x")
3538 (vec_select:V8SF
3539 (vec_concat:V16SF
3540 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3541 (match_dup 1))
3542 (parallel [(const_int 0) (const_int 0)
3543 (const_int 2) (const_int 2)
3544 (const_int 4) (const_int 4)
3545 (const_int 6) (const_int 6)])))]
3546 "TARGET_AVX"
3547 "vmovsldup\t{%1, %0|%0, %1}"
3548 [(set_attr "type" "sse")
3549 (set_attr "prefix" "vex")
3550 (set_attr "mode" "V8SF")])
3551
3552 (define_insn "sse3_movsldup"
3553 [(set (match_operand:V4SF 0 "register_operand" "=x")
3554 (vec_select:V4SF
3555 (vec_concat:V8SF
3556 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3557 (match_dup 1))
3558 (parallel [(const_int 0)
3559 (const_int 0)
3560 (const_int 6)
3561 (const_int 6)])))]
3562 "TARGET_SSE3"
3563 "%vmovsldup\t{%1, %0|%0, %1}"
3564 [(set_attr "type" "sse")
3565 (set_attr "prefix_rep" "1")
3566 (set_attr "prefix" "maybe_vex")
3567 (set_attr "mode" "V4SF")])
3568
3569 (define_expand "avx_shufps256"
3570 [(match_operand:V8SF 0 "register_operand" "")
3571 (match_operand:V8SF 1 "register_operand" "")
3572 (match_operand:V8SF 2 "nonimmediate_operand" "")
3573 (match_operand:SI 3 "const_int_operand" "")]
3574 "TARGET_AVX"
3575 {
3576 int mask = INTVAL (operands[3]);
3577 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3578 GEN_INT ((mask >> 0) & 3),
3579 GEN_INT ((mask >> 2) & 3),
3580 GEN_INT (((mask >> 4) & 3) + 8),
3581 GEN_INT (((mask >> 6) & 3) + 8),
3582 GEN_INT (((mask >> 0) & 3) + 4),
3583 GEN_INT (((mask >> 2) & 3) + 4),
3584 GEN_INT (((mask >> 4) & 3) + 12),
3585 GEN_INT (((mask >> 6) & 3) + 12)));
3586 DONE;
3587 })
3588
3589 ;; One bit in mask selects 2 elements.
3590 (define_insn "avx_shufps256_1"
3591 [(set (match_operand:V8SF 0 "register_operand" "=x")
3592 (vec_select:V8SF
3593 (vec_concat:V16SF
3594 (match_operand:V8SF 1 "register_operand" "x")
3595 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3596 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3597 (match_operand 4 "const_0_to_3_operand" "")
3598 (match_operand 5 "const_8_to_11_operand" "")
3599 (match_operand 6 "const_8_to_11_operand" "")
3600 (match_operand 7 "const_4_to_7_operand" "")
3601 (match_operand 8 "const_4_to_7_operand" "")
3602 (match_operand 9 "const_12_to_15_operand" "")
3603 (match_operand 10 "const_12_to_15_operand" "")])))]
3604 "TARGET_AVX
3605 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3606 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3607 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3608 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3609 {
3610 int mask;
3611 mask = INTVAL (operands[3]);
3612 mask |= INTVAL (operands[4]) << 2;
3613 mask |= (INTVAL (operands[5]) - 8) << 4;
3614 mask |= (INTVAL (operands[6]) - 8) << 6;
3615 operands[3] = GEN_INT (mask);
3616
3617 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3618 }
3619 [(set_attr "type" "sselog")
3620 (set_attr "length_immediate" "1")
3621 (set_attr "prefix" "vex")
3622 (set_attr "mode" "V8SF")])
3623
3624 (define_expand "sse_shufps"
3625 [(match_operand:V4SF 0 "register_operand" "")
3626 (match_operand:V4SF 1 "register_operand" "")
3627 (match_operand:V4SF 2 "nonimmediate_operand" "")
3628 (match_operand:SI 3 "const_int_operand" "")]
3629 "TARGET_SSE"
3630 {
3631 int mask = INTVAL (operands[3]);
3632 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3633 GEN_INT ((mask >> 0) & 3),
3634 GEN_INT ((mask >> 2) & 3),
3635 GEN_INT (((mask >> 4) & 3) + 4),
3636 GEN_INT (((mask >> 6) & 3) + 4)));
3637 DONE;
3638 })
3639
3640 (define_insn "*avx_shufps_<mode>"
3641 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3642 (vec_select:SSEMODE4S
3643 (vec_concat:<ssedoublesizemode>
3644 (match_operand:SSEMODE4S 1 "register_operand" "x")
3645 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3646 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3647 (match_operand 4 "const_0_to_3_operand" "")
3648 (match_operand 5 "const_4_to_7_operand" "")
3649 (match_operand 6 "const_4_to_7_operand" "")])))]
3650 "TARGET_AVX"
3651 {
3652 int mask = 0;
3653 mask |= INTVAL (operands[3]) << 0;
3654 mask |= INTVAL (operands[4]) << 2;
3655 mask |= (INTVAL (operands[5]) - 4) << 4;
3656 mask |= (INTVAL (operands[6]) - 4) << 6;
3657 operands[3] = GEN_INT (mask);
3658
3659 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3660 }
3661 [(set_attr "type" "sselog")
3662 (set_attr "length_immediate" "1")
3663 (set_attr "prefix" "vex")
3664 (set_attr "mode" "V4SF")])
3665
3666 (define_insn "sse_shufps_<mode>"
3667 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3668 (vec_select:SSEMODE4S
3669 (vec_concat:<ssedoublesizemode>
3670 (match_operand:SSEMODE4S 1 "register_operand" "0")
3671 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3672 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3673 (match_operand 4 "const_0_to_3_operand" "")
3674 (match_operand 5 "const_4_to_7_operand" "")
3675 (match_operand 6 "const_4_to_7_operand" "")])))]
3676 "TARGET_SSE"
3677 {
3678 int mask = 0;
3679 mask |= INTVAL (operands[3]) << 0;
3680 mask |= INTVAL (operands[4]) << 2;
3681 mask |= (INTVAL (operands[5]) - 4) << 4;
3682 mask |= (INTVAL (operands[6]) - 4) << 6;
3683 operands[3] = GEN_INT (mask);
3684
3685 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3686 }
3687 [(set_attr "type" "sselog")
3688 (set_attr "length_immediate" "1")
3689 (set_attr "mode" "V4SF")])
3690
3691 (define_insn "sse_storehps"
3692 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3693 (vec_select:V2SF
3694 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3695 (parallel [(const_int 2) (const_int 3)])))]
3696 "TARGET_SSE"
3697 "@
3698 %vmovhps\t{%1, %0|%0, %1}
3699 %vmovhlps\t{%1, %d0|%d0, %1}
3700 %vmovlps\t{%H1, %d0|%d0, %H1}"
3701 [(set_attr "type" "ssemov")
3702 (set_attr "prefix" "maybe_vex")
3703 (set_attr "mode" "V2SF,V4SF,V2SF")])
3704
3705 (define_expand "sse_loadhps_exp"
3706 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3707 (vec_concat:V4SF
3708 (vec_select:V2SF
3709 (match_operand:V4SF 1 "nonimmediate_operand" "")
3710 (parallel [(const_int 0) (const_int 1)]))
3711 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3712 "TARGET_SSE"
3713 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3714
3715 (define_insn "*avx_loadhps"
3716 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3717 (vec_concat:V4SF
3718 (vec_select:V2SF
3719 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3720 (parallel [(const_int 0) (const_int 1)]))
3721 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3722 "TARGET_AVX"
3723 "@
3724 vmovhps\t{%2, %1, %0|%0, %1, %2}
3725 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3726 vmovlps\t{%2, %H0|%H0, %2}"
3727 [(set_attr "type" "ssemov")
3728 (set_attr "prefix" "vex")
3729 (set_attr "mode" "V2SF,V4SF,V2SF")])
3730
3731 (define_insn "sse_loadhps"
3732 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3733 (vec_concat:V4SF
3734 (vec_select:V2SF
3735 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3736 (parallel [(const_int 0) (const_int 1)]))
3737 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3738 "TARGET_SSE"
3739 "@
3740 movhps\t{%2, %0|%0, %2}
3741 movlhps\t{%2, %0|%0, %2}
3742 movlps\t{%2, %H0|%H0, %2}"
3743 [(set_attr "type" "ssemov")
3744 (set_attr "mode" "V2SF,V4SF,V2SF")])
3745
3746 (define_insn "*avx_storelps"
3747 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3748 (vec_select:V2SF
3749 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3750 (parallel [(const_int 0) (const_int 1)])))]
3751 "TARGET_AVX"
3752 "@
3753 vmovlps\t{%1, %0|%0, %1}
3754 vmovaps\t{%1, %0|%0, %1}
3755 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3756 [(set_attr "type" "ssemov")
3757 (set_attr "prefix" "vex")
3758 (set_attr "mode" "V2SF,V2DF,V2SF")])
3759
3760 (define_insn "sse_storelps"
3761 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3762 (vec_select:V2SF
3763 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3764 (parallel [(const_int 0) (const_int 1)])))]
3765 "TARGET_SSE"
3766 "@
3767 movlps\t{%1, %0|%0, %1}
3768 movaps\t{%1, %0|%0, %1}
3769 movlps\t{%1, %0|%0, %1}"
3770 [(set_attr "type" "ssemov")
3771 (set_attr "mode" "V2SF,V4SF,V2SF")])
3772
3773 (define_expand "sse_loadlps_exp"
3774 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3775 (vec_concat:V4SF
3776 (match_operand:V2SF 2 "nonimmediate_operand" "")
3777 (vec_select:V2SF
3778 (match_operand:V4SF 1 "nonimmediate_operand" "")
3779 (parallel [(const_int 2) (const_int 3)]))))]
3780 "TARGET_SSE"
3781 "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3782
3783 (define_insn "*avx_loadlps"
3784 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3785 (vec_concat:V4SF
3786 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3787 (vec_select:V2SF
3788 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3789 (parallel [(const_int 2) (const_int 3)]))))]
3790 "TARGET_AVX"
3791 "@
3792 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3793 vmovlps\t{%2, %1, %0|%0, %1, %2}
3794 vmovlps\t{%2, %0|%0, %2}"
3795 [(set_attr "type" "sselog,ssemov,ssemov")
3796 (set_attr "length_immediate" "1,*,*")
3797 (set_attr "prefix" "vex")
3798 (set_attr "mode" "V4SF,V2SF,V2SF")])
3799
3800 (define_insn "sse_loadlps"
3801 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3802 (vec_concat:V4SF
3803 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3804 (vec_select:V2SF
3805 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3806 (parallel [(const_int 2) (const_int 3)]))))]
3807 "TARGET_SSE"
3808 "@
3809 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3810 movlps\t{%2, %0|%0, %2}
3811 movlps\t{%2, %0|%0, %2}"
3812 [(set_attr "type" "sselog,ssemov,ssemov")
3813 (set_attr "length_immediate" "1,*,*")
3814 (set_attr "mode" "V4SF,V2SF,V2SF")])
3815
3816 (define_insn "*avx_movss"
3817 [(set (match_operand:V4SF 0 "register_operand" "=x")
3818 (vec_merge:V4SF
3819 (match_operand:V4SF 2 "register_operand" "x")
3820 (match_operand:V4SF 1 "register_operand" "x")
3821 (const_int 1)))]
3822 "TARGET_AVX"
3823 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3824 [(set_attr "type" "ssemov")
3825 (set_attr "prefix" "vex")
3826 (set_attr "mode" "SF")])
3827
3828 (define_insn "sse_movss"
3829 [(set (match_operand:V4SF 0 "register_operand" "=x")
3830 (vec_merge:V4SF
3831 (match_operand:V4SF 2 "register_operand" "x")
3832 (match_operand:V4SF 1 "register_operand" "0")
3833 (const_int 1)))]
3834 "TARGET_SSE"
3835 "movss\t{%2, %0|%0, %2}"
3836 [(set_attr "type" "ssemov")
3837 (set_attr "mode" "SF")])
3838
3839 (define_expand "vec_dupv4sf"
3840 [(set (match_operand:V4SF 0 "register_operand" "")
3841 (vec_duplicate:V4SF
3842 (match_operand:SF 1 "nonimmediate_operand" "")))]
3843 "TARGET_SSE"
3844 {
3845 if (!TARGET_AVX)
3846 operands[1] = force_reg (V4SFmode, operands[1]);
3847 })
3848
3849 (define_insn "*vec_dupv4sf_avx"
3850 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3851 (vec_duplicate:V4SF
3852 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3853 "TARGET_AVX"
3854 "@
3855 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3856 vbroadcastss\t{%1, %0|%0, %1}"
3857 [(set_attr "type" "sselog1,ssemov")
3858 (set_attr "length_immediate" "1,0")
3859 (set_attr "prefix_extra" "0,1")
3860 (set_attr "prefix" "vex")
3861 (set_attr "mode" "V4SF")])
3862
3863 (define_insn "*vec_dupv4sf"
3864 [(set (match_operand:V4SF 0 "register_operand" "=x")
3865 (vec_duplicate:V4SF
3866 (match_operand:SF 1 "register_operand" "0")))]
3867 "TARGET_SSE"
3868 "shufps\t{$0, %0, %0|%0, %0, 0}"
3869 [(set_attr "type" "sselog1")
3870 (set_attr "length_immediate" "1")
3871 (set_attr "mode" "V4SF")])
3872
3873 (define_insn "*vec_concatv2sf_avx"
3874 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3875 (vec_concat:V2SF
3876 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3877 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3878 "TARGET_AVX"
3879 "@
3880 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3881 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3882 vmovss\t{%1, %0|%0, %1}
3883 punpckldq\t{%2, %0|%0, %2}
3884 movd\t{%1, %0|%0, %1}"
3885 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3886 (set_attr "length_immediate" "*,1,*,*,*")
3887 (set_attr "prefix_extra" "*,1,*,*,*")
3888 (set (attr "prefix")
3889 (if_then_else (eq_attr "alternative" "3,4")
3890 (const_string "orig")
3891 (const_string "vex")))
3892 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3893
3894 ;; Although insertps takes register source, we prefer
3895 ;; unpcklps with register source since it is shorter.
3896 (define_insn "*vec_concatv2sf_sse4_1"
3897 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3898 (vec_concat:V2SF
3899 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3900 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3901 "TARGET_SSE4_1"
3902 "@
3903 unpcklps\t{%2, %0|%0, %2}
3904 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3905 movss\t{%1, %0|%0, %1}
3906 punpckldq\t{%2, %0|%0, %2}
3907 movd\t{%1, %0|%0, %1}"
3908 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3909 (set_attr "prefix_data16" "*,1,*,*,*")
3910 (set_attr "prefix_extra" "*,1,*,*,*")
3911 (set_attr "length_immediate" "*,1,*,*,*")
3912 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3913
3914 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3915 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3916 ;; alternatives pretty much forces the MMX alternative to be chosen.
3917 (define_insn "*vec_concatv2sf_sse"
3918 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3919 (vec_concat:V2SF
3920 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3921 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3922 "TARGET_SSE"
3923 "@
3924 unpcklps\t{%2, %0|%0, %2}
3925 movss\t{%1, %0|%0, %1}
3926 punpckldq\t{%2, %0|%0, %2}
3927 movd\t{%1, %0|%0, %1}"
3928 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3929 (set_attr "mode" "V4SF,SF,DI,DI")])
3930
3931 (define_insn "*vec_concatv4sf_avx"
3932 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3933 (vec_concat:V4SF
3934 (match_operand:V2SF 1 "register_operand" " x,x")
3935 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3936 "TARGET_AVX"
3937 "@
3938 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3939 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3940 [(set_attr "type" "ssemov")
3941 (set_attr "prefix" "vex")
3942 (set_attr "mode" "V4SF,V2SF")])
3943
3944 (define_insn "*vec_concatv4sf_sse"
3945 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3946 (vec_concat:V4SF
3947 (match_operand:V2SF 1 "register_operand" " 0,0")
3948 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3949 "TARGET_SSE"
3950 "@
3951 movlhps\t{%2, %0|%0, %2}
3952 movhps\t{%2, %0|%0, %2}"
3953 [(set_attr "type" "ssemov")
3954 (set_attr "mode" "V4SF,V2SF")])
3955
3956 (define_expand "vec_init<mode>"
3957 [(match_operand:SSEMODE 0 "register_operand" "")
3958 (match_operand 1 "" "")]
3959 "TARGET_SSE"
3960 {
3961 ix86_expand_vector_init (false, operands[0], operands[1]);
3962 DONE;
3963 })
3964
3965 (define_insn "*vec_set<mode>_0_avx"
3966 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3967 (vec_merge:SSEMODE4S
3968 (vec_duplicate:SSEMODE4S
3969 (match_operand:<ssescalarmode> 2
3970 "general_operand" " x,m,*r,x,*rm,x*rfF"))
3971 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
3972 (const_int 1)))]
3973 "TARGET_AVX"
3974 "@
3975 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
3976 vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3977 vmovd\t{%2, %0|%0, %2}
3978 vmovss\t{%2, %1, %0|%0, %1, %2}
3979 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3980 #"
3981 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3982 (set_attr "prefix_extra" "*,*,*,*,1,*")
3983 (set_attr "length_immediate" "*,*,*,*,1,*")
3984 (set_attr "prefix" "vex")
3985 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3986
3987 (define_insn "*vec_set<mode>_0_sse4_1"
3988 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3989 (vec_merge:SSEMODE4S
3990 (vec_duplicate:SSEMODE4S
3991 (match_operand:<ssescalarmode> 2
3992 "general_operand" " x,m,*r,x,*rm,*rfF"))
3993 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
3994 (const_int 1)))]
3995 "TARGET_SSE4_1"
3996 "@
3997 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
3998 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3999 movd\t{%2, %0|%0, %2}
4000 movss\t{%2, %0|%0, %2}
4001 pinsrd\t{$0, %2, %0|%0, %2, 0}
4002 #"
4003 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
4004 (set_attr "prefix_extra" "*,*,*,*,1,*")
4005 (set_attr "length_immediate" "*,*,*,*,1,*")
4006 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
4007
4008 (define_insn "*vec_set<mode>_0_sse2"
4009 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
4010 (vec_merge:SSEMODE4S
4011 (vec_duplicate:SSEMODE4S
4012 (match_operand:<ssescalarmode> 2
4013 "general_operand" " m,*r,x,x*rfF"))
4014 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
4015 (const_int 1)))]
4016 "TARGET_SSE2"
4017 "@
4018 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4019 movd\t{%2, %0|%0, %2}
4020 movss\t{%2, %0|%0, %2}
4021 #"
4022 [(set_attr "type" "ssemov")
4023 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
4024
4025 (define_insn "vec_set<mode>_0"
4026 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
4027 (vec_merge:SSEMODE4S
4028 (vec_duplicate:SSEMODE4S
4029 (match_operand:<ssescalarmode> 2
4030 "general_operand" " m,x,x*rfF"))
4031 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
4032 (const_int 1)))]
4033 "TARGET_SSE"
4034 "@
4035 movss\t{%2, %0|%0, %2}
4036 movss\t{%2, %0|%0, %2}
4037 #"
4038 [(set_attr "type" "ssemov")
4039 (set_attr "mode" "SF,SF,*")])
4040
4041 ;; A subset is vec_setv4sf.
4042 (define_insn "*vec_setv4sf_avx"
4043 [(set (match_operand:V4SF 0 "register_operand" "=x")
4044 (vec_merge:V4SF
4045 (vec_duplicate:V4SF
4046 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4047 (match_operand:V4SF 1 "register_operand" "x")
4048 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4049 "TARGET_AVX"
4050 {
4051 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4052 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4053 }
4054 [(set_attr "type" "sselog")
4055 (set_attr "prefix_extra" "1")
4056 (set_attr "length_immediate" "1")
4057 (set_attr "prefix" "vex")
4058 (set_attr "mode" "V4SF")])
4059
4060 (define_insn "*vec_setv4sf_sse4_1"
4061 [(set (match_operand:V4SF 0 "register_operand" "=x")
4062 (vec_merge:V4SF
4063 (vec_duplicate:V4SF
4064 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4065 (match_operand:V4SF 1 "register_operand" "0")
4066 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4067 "TARGET_SSE4_1"
4068 {
4069 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4070 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4071 }
4072 [(set_attr "type" "sselog")
4073 (set_attr "prefix_data16" "1")
4074 (set_attr "prefix_extra" "1")
4075 (set_attr "length_immediate" "1")
4076 (set_attr "mode" "V4SF")])
4077
4078 (define_insn "*avx_insertps"
4079 [(set (match_operand:V4SF 0 "register_operand" "=x")
4080 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4081 (match_operand:V4SF 1 "register_operand" "x")
4082 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4083 UNSPEC_INSERTPS))]
4084 "TARGET_AVX"
4085 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4086 [(set_attr "type" "sselog")
4087 (set_attr "prefix" "vex")
4088 (set_attr "prefix_extra" "1")
4089 (set_attr "length_immediate" "1")
4090 (set_attr "mode" "V4SF")])
4091
4092 (define_insn "sse4_1_insertps"
4093 [(set (match_operand:V4SF 0 "register_operand" "=x")
4094 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4095 (match_operand:V4SF 1 "register_operand" "0")
4096 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4097 UNSPEC_INSERTPS))]
4098 "TARGET_SSE4_1"
4099 "insertps\t{%3, %2, %0|%0, %2, %3}";
4100 [(set_attr "type" "sselog")
4101 (set_attr "prefix_data16" "1")
4102 (set_attr "prefix_extra" "1")
4103 (set_attr "length_immediate" "1")
4104 (set_attr "mode" "V4SF")])
4105
4106 (define_split
4107 [(set (match_operand:SSEMODE4S 0 "memory_operand" "")
4108 (vec_merge:SSEMODE4S
4109 (vec_duplicate:SSEMODE4S
4110 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
4111 (match_dup 0)
4112 (const_int 1)))]
4113 "TARGET_SSE && reload_completed"
4114 [(const_int 0)]
4115 {
4116 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4117 operands[1]);
4118 DONE;
4119 })
4120
4121 (define_expand "vec_set<mode>"
4122 [(match_operand:SSEMODE 0 "register_operand" "")
4123 (match_operand:<ssescalarmode> 1 "register_operand" "")
4124 (match_operand 2 "const_int_operand" "")]
4125 "TARGET_SSE"
4126 {
4127 ix86_expand_vector_set (false, operands[0], operands[1],
4128 INTVAL (operands[2]));
4129 DONE;
4130 })
4131
4132 (define_insn_and_split "*vec_extractv4sf_0"
4133 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4134 (vec_select:SF
4135 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4136 (parallel [(const_int 0)])))]
4137 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4138 "#"
4139 "&& reload_completed"
4140 [(const_int 0)]
4141 {
4142 rtx op1 = operands[1];
4143 if (REG_P (op1))
4144 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4145 else
4146 op1 = gen_lowpart (SFmode, op1);
4147 emit_move_insn (operands[0], op1);
4148 DONE;
4149 })
4150
4151 (define_expand "avx_vextractf128<mode>"
4152 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4153 (match_operand:AVX256MODE 1 "register_operand" "")
4154 (match_operand:SI 2 "const_0_to_1_operand" "")]
4155 "TARGET_AVX"
4156 {
4157 switch (INTVAL (operands[2]))
4158 {
4159 case 0:
4160 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4161 break;
4162 case 1:
4163 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4164 break;
4165 default:
4166 gcc_unreachable ();
4167 }
4168 DONE;
4169 })
4170
4171 (define_insn_and_split "vec_extract_lo_<mode>"
4172 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4173 (vec_select:<avxhalfvecmode>
4174 (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x")
4175 (parallel [(const_int 0) (const_int 1)])))]
4176 "TARGET_AVX"
4177 "#"
4178 "&& reload_completed"
4179 [(const_int 0)]
4180 {
4181 rtx op1 = operands[1];
4182 if (REG_P (op1))
4183 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4184 else
4185 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4186 emit_move_insn (operands[0], op1);
4187 DONE;
4188 })
4189
4190 (define_insn "vec_extract_hi_<mode>"
4191 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4192 (vec_select:<avxhalfvecmode>
4193 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4194 (parallel [(const_int 2) (const_int 3)])))]
4195 "TARGET_AVX"
4196 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4197 [(set_attr "type" "sselog")
4198 (set_attr "prefix_extra" "1")
4199 (set_attr "length_immediate" "1")
4200 (set_attr "memory" "none,store")
4201 (set_attr "prefix" "vex")
4202 (set_attr "mode" "V8SF")])
4203
4204 (define_insn_and_split "vec_extract_lo_<mode>"
4205 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4206 (vec_select:<avxhalfvecmode>
4207 (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x")
4208 (parallel [(const_int 0) (const_int 1)
4209 (const_int 2) (const_int 3)])))]
4210 "TARGET_AVX"
4211 "#"
4212 "&& reload_completed"
4213 [(const_int 0)]
4214 {
4215 rtx op1 = operands[1];
4216 if (REG_P (op1))
4217 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4218 else
4219 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4220 emit_move_insn (operands[0], op1);
4221 DONE;
4222 })
4223
4224 (define_insn "vec_extract_hi_<mode>"
4225 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4226 (vec_select:<avxhalfvecmode>
4227 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4228 (parallel [(const_int 4) (const_int 5)
4229 (const_int 6) (const_int 7)])))]
4230 "TARGET_AVX"
4231 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4232 [(set_attr "type" "sselog")
4233 (set_attr "prefix_extra" "1")
4234 (set_attr "length_immediate" "1")
4235 (set_attr "memory" "none,store")
4236 (set_attr "prefix" "vex")
4237 (set_attr "mode" "V8SF")])
4238
4239 (define_insn_and_split "vec_extract_lo_v16hi"
4240 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4241 (vec_select:V8HI
4242 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4243 (parallel [(const_int 0) (const_int 1)
4244 (const_int 2) (const_int 3)
4245 (const_int 4) (const_int 5)
4246 (const_int 6) (const_int 7)])))]
4247 "TARGET_AVX"
4248 "#"
4249 "&& reload_completed"
4250 [(const_int 0)]
4251 {
4252 rtx op1 = operands[1];
4253 if (REG_P (op1))
4254 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4255 else
4256 op1 = gen_lowpart (V8HImode, op1);
4257 emit_move_insn (operands[0], op1);
4258 DONE;
4259 })
4260
4261 (define_insn "vec_extract_hi_v16hi"
4262 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4263 (vec_select:V8HI
4264 (match_operand:V16HI 1 "register_operand" "x,x")
4265 (parallel [(const_int 8) (const_int 9)
4266 (const_int 10) (const_int 11)
4267 (const_int 12) (const_int 13)
4268 (const_int 14) (const_int 15)])))]
4269 "TARGET_AVX"
4270 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4271 [(set_attr "type" "sselog")
4272 (set_attr "prefix_extra" "1")
4273 (set_attr "length_immediate" "1")
4274 (set_attr "memory" "none,store")
4275 (set_attr "prefix" "vex")
4276 (set_attr "mode" "V8SF")])
4277
4278 (define_insn_and_split "vec_extract_lo_v32qi"
4279 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4280 (vec_select:V16QI
4281 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4282 (parallel [(const_int 0) (const_int 1)
4283 (const_int 2) (const_int 3)
4284 (const_int 4) (const_int 5)
4285 (const_int 6) (const_int 7)
4286 (const_int 8) (const_int 9)
4287 (const_int 10) (const_int 11)
4288 (const_int 12) (const_int 13)
4289 (const_int 14) (const_int 15)])))]
4290 "TARGET_AVX"
4291 "#"
4292 "&& reload_completed"
4293 [(const_int 0)]
4294 {
4295 rtx op1 = operands[1];
4296 if (REG_P (op1))
4297 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4298 else
4299 op1 = gen_lowpart (V16QImode, op1);
4300 emit_move_insn (operands[0], op1);
4301 DONE;
4302 })
4303
4304 (define_insn "vec_extract_hi_v32qi"
4305 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4306 (vec_select:V16QI
4307 (match_operand:V32QI 1 "register_operand" "x,x")
4308 (parallel [(const_int 16) (const_int 17)
4309 (const_int 18) (const_int 19)
4310 (const_int 20) (const_int 21)
4311 (const_int 22) (const_int 23)
4312 (const_int 24) (const_int 25)
4313 (const_int 26) (const_int 27)
4314 (const_int 28) (const_int 29)
4315 (const_int 30) (const_int 31)])))]
4316 "TARGET_AVX"
4317 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4318 [(set_attr "type" "sselog")
4319 (set_attr "prefix_extra" "1")
4320 (set_attr "length_immediate" "1")
4321 (set_attr "memory" "none,store")
4322 (set_attr "prefix" "vex")
4323 (set_attr "mode" "V8SF")])
4324
4325 (define_insn "*sse4_1_extractps"
4326 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4327 (vec_select:SF
4328 (match_operand:V4SF 1 "register_operand" "x")
4329 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4330 "TARGET_SSE4_1"
4331 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4332 [(set_attr "type" "sselog")
4333 (set_attr "prefix_data16" "1")
4334 (set_attr "prefix_extra" "1")
4335 (set_attr "length_immediate" "1")
4336 (set_attr "prefix" "maybe_vex")
4337 (set_attr "mode" "V4SF")])
4338
4339 (define_insn_and_split "*vec_extract_v4sf_mem"
4340 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4341 (vec_select:SF
4342 (match_operand:V4SF 1 "memory_operand" "o")
4343 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4344 ""
4345 "#"
4346 "reload_completed"
4347 [(const_int 0)]
4348 {
4349 int i = INTVAL (operands[2]);
4350
4351 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4352 DONE;
4353 })
4354
4355 (define_expand "vec_extract<mode>"
4356 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4357 (match_operand:SSEMODE 1 "register_operand" "")
4358 (match_operand 2 "const_int_operand" "")]
4359 "TARGET_SSE"
4360 {
4361 ix86_expand_vector_extract (false, operands[0], operands[1],
4362 INTVAL (operands[2]));
4363 DONE;
4364 })
4365
4366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4367 ;;
4368 ;; Parallel double-precision floating point element swizzling
4369 ;;
4370 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4371
4372 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4373 (define_insn "avx_unpckhpd256"
4374 [(set (match_operand:V4DF 0 "register_operand" "=x")
4375 (vec_select:V4DF
4376 (vec_concat:V8DF
4377 (match_operand:V4DF 1 "register_operand" "x")
4378 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4379 (parallel [(const_int 1) (const_int 5)
4380 (const_int 3) (const_int 7)])))]
4381 "TARGET_AVX"
4382 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4383 [(set_attr "type" "sselog")
4384 (set_attr "prefix" "vex")
4385 (set_attr "mode" "V4DF")])
4386
4387 (define_expand "vec_interleave_highv2df"
4388 [(set (match_operand:V2DF 0 "register_operand" "")
4389 (vec_select:V2DF
4390 (vec_concat:V4DF
4391 (match_operand:V2DF 1 "nonimmediate_operand" "")
4392 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4393 (parallel [(const_int 1)
4394 (const_int 3)])))]
4395 "TARGET_SSE2"
4396 {
4397 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4398 operands[2] = force_reg (V2DFmode, operands[2]);
4399 })
4400
4401 (define_insn "*avx_interleave_highv2df"
4402 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4403 (vec_select:V2DF
4404 (vec_concat:V4DF
4405 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4406 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4407 (parallel [(const_int 1)
4408 (const_int 3)])))]
4409 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4410 "@
4411 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4412 vmovddup\t{%H1, %0|%0, %H1}
4413 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4414 vmovhpd\t{%1, %0|%0, %1}"
4415 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4416 (set_attr "prefix" "vex")
4417 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4418
4419 (define_insn "*sse3_interleave_highv2df"
4420 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4421 (vec_select:V2DF
4422 (vec_concat:V4DF
4423 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4424 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4425 (parallel [(const_int 1)
4426 (const_int 3)])))]
4427 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4428 "@
4429 unpckhpd\t{%2, %0|%0, %2}
4430 movddup\t{%H1, %0|%0, %H1}
4431 movlpd\t{%H1, %0|%0, %H1}
4432 movhpd\t{%1, %0|%0, %1}"
4433 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4434 (set_attr "prefix_data16" "*,*,1,1")
4435 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4436
4437 (define_insn "*sse2_interleave_highv2df"
4438 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4439 (vec_select:V2DF
4440 (vec_concat:V4DF
4441 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4442 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4443 (parallel [(const_int 1)
4444 (const_int 3)])))]
4445 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4446 "@
4447 unpckhpd\t{%2, %0|%0, %2}
4448 movlpd\t{%H1, %0|%0, %H1}
4449 movhpd\t{%1, %0|%0, %1}"
4450 [(set_attr "type" "sselog,ssemov,ssemov")
4451 (set_attr "prefix_data16" "*,1,1")
4452 (set_attr "mode" "V2DF,V1DF,V1DF")])
4453
4454 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4455 (define_expand "avx_movddup256"
4456 [(set (match_operand:V4DF 0 "register_operand" "")
4457 (vec_select:V4DF
4458 (vec_concat:V8DF
4459 (match_operand:V4DF 1 "nonimmediate_operand" "")
4460 (match_dup 1))
4461 (parallel [(const_int 0) (const_int 4)
4462 (const_int 2) (const_int 6)])))]
4463 "TARGET_AVX")
4464
4465 (define_expand "avx_unpcklpd256"
4466 [(set (match_operand:V4DF 0 "register_operand" "")
4467 (vec_select:V4DF
4468 (vec_concat:V8DF
4469 (match_operand:V4DF 1 "register_operand" "")
4470 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4471 (parallel [(const_int 0) (const_int 4)
4472 (const_int 2) (const_int 6)])))]
4473 "TARGET_AVX")
4474
4475 (define_insn "*avx_unpcklpd256"
4476 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4477 (vec_select:V4DF
4478 (vec_concat:V8DF
4479 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4480 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4481 (parallel [(const_int 0) (const_int 4)
4482 (const_int 2) (const_int 6)])))]
4483 "TARGET_AVX
4484 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4485 "@
4486 vmovddup\t{%1, %0|%0, %1}
4487 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4488 [(set_attr "type" "sselog")
4489 (set_attr "prefix" "vex")
4490 (set_attr "mode" "V4DF")])
4491
4492 (define_expand "vec_interleave_lowv2df"
4493 [(set (match_operand:V2DF 0 "register_operand" "")
4494 (vec_select:V2DF
4495 (vec_concat:V4DF
4496 (match_operand:V2DF 1 "nonimmediate_operand" "")
4497 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4498 (parallel [(const_int 0)
4499 (const_int 2)])))]
4500 "TARGET_SSE2"
4501 {
4502 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4503 operands[1] = force_reg (V2DFmode, operands[1]);
4504 })
4505
4506 (define_insn "*avx_interleave_lowv2df"
4507 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4508 (vec_select:V2DF
4509 (vec_concat:V4DF
4510 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4511 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4512 (parallel [(const_int 0)
4513 (const_int 2)])))]
4514 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4515 "@
4516 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4517 vmovddup\t{%1, %0|%0, %1}
4518 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4519 vmovlpd\t{%2, %H0|%H0, %2}"
4520 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4521 (set_attr "prefix" "vex")
4522 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4523
4524 (define_insn "*sse3_interleave_lowv2df"
4525 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4526 (vec_select:V2DF
4527 (vec_concat:V4DF
4528 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4529 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4530 (parallel [(const_int 0)
4531 (const_int 2)])))]
4532 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4533 "@
4534 unpcklpd\t{%2, %0|%0, %2}
4535 movddup\t{%1, %0|%0, %1}
4536 movhpd\t{%2, %0|%0, %2}
4537 movlpd\t{%2, %H0|%H0, %2}"
4538 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4539 (set_attr "prefix_data16" "*,*,1,1")
4540 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4541
4542 (define_insn "*sse2_interleave_lowv2df"
4543 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4544 (vec_select:V2DF
4545 (vec_concat:V4DF
4546 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4547 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4548 (parallel [(const_int 0)
4549 (const_int 2)])))]
4550 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4551 "@
4552 unpcklpd\t{%2, %0|%0, %2}
4553 movhpd\t{%2, %0|%0, %2}
4554 movlpd\t{%2, %H0|%H0, %2}"
4555 [(set_attr "type" "sselog,ssemov,ssemov")
4556 (set_attr "prefix_data16" "*,1,1")
4557 (set_attr "mode" "V2DF,V1DF,V1DF")])
4558
4559 (define_split
4560 [(set (match_operand:V2DF 0 "memory_operand" "")
4561 (vec_select:V2DF
4562 (vec_concat:V4DF
4563 (match_operand:V2DF 1 "register_operand" "")
4564 (match_dup 1))
4565 (parallel [(const_int 0)
4566 (const_int 2)])))]
4567 "TARGET_SSE3 && reload_completed"
4568 [(const_int 0)]
4569 {
4570 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4571 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4572 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4573 DONE;
4574 })
4575
4576 (define_split
4577 [(set (match_operand:V2DF 0 "register_operand" "")
4578 (vec_select:V2DF
4579 (vec_concat:V4DF
4580 (match_operand:V2DF 1 "memory_operand" "")
4581 (match_dup 1))
4582 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4583 (match_operand:SI 3 "const_int_operand" "")])))]
4584 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4585 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4586 {
4587 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4588 })
4589
4590 (define_expand "avx_shufpd256"
4591 [(match_operand:V4DF 0 "register_operand" "")
4592 (match_operand:V4DF 1 "register_operand" "")
4593 (match_operand:V4DF 2 "nonimmediate_operand" "")
4594 (match_operand:SI 3 "const_int_operand" "")]
4595 "TARGET_AVX"
4596 {
4597 int mask = INTVAL (operands[3]);
4598 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4599 GEN_INT (mask & 1),
4600 GEN_INT (mask & 2 ? 5 : 4),
4601 GEN_INT (mask & 4 ? 3 : 2),
4602 GEN_INT (mask & 8 ? 7 : 6)));
4603 DONE;
4604 })
4605
4606 (define_insn "avx_shufpd256_1"
4607 [(set (match_operand:V4DF 0 "register_operand" "=x")
4608 (vec_select:V4DF
4609 (vec_concat:V8DF
4610 (match_operand:V4DF 1 "register_operand" "x")
4611 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4612 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4613 (match_operand 4 "const_4_to_5_operand" "")
4614 (match_operand 5 "const_2_to_3_operand" "")
4615 (match_operand 6 "const_6_to_7_operand" "")])))]
4616 "TARGET_AVX"
4617 {
4618 int mask;
4619 mask = INTVAL (operands[3]);
4620 mask |= (INTVAL (operands[4]) - 4) << 1;
4621 mask |= (INTVAL (operands[5]) - 2) << 2;
4622 mask |= (INTVAL (operands[6]) - 6) << 3;
4623 operands[3] = GEN_INT (mask);
4624
4625 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4626 }
4627 [(set_attr "type" "sselog")
4628 (set_attr "length_immediate" "1")
4629 (set_attr "prefix" "vex")
4630 (set_attr "mode" "V4DF")])
4631
4632 (define_expand "sse2_shufpd"
4633 [(match_operand:V2DF 0 "register_operand" "")
4634 (match_operand:V2DF 1 "register_operand" "")
4635 (match_operand:V2DF 2 "nonimmediate_operand" "")
4636 (match_operand:SI 3 "const_int_operand" "")]
4637 "TARGET_SSE2"
4638 {
4639 int mask = INTVAL (operands[3]);
4640 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4641 GEN_INT (mask & 1),
4642 GEN_INT (mask & 2 ? 3 : 2)));
4643 DONE;
4644 })
4645
4646 (define_expand "vec_extract_even<mode>"
4647 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4648 (match_operand:SSEMODE_EO 1 "register_operand" "")
4649 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4650 ""
4651 {
4652 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4653 DONE;
4654 })
4655
4656 (define_expand "vec_extract_odd<mode>"
4657 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4658 (match_operand:SSEMODE_EO 1 "register_operand" "")
4659 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4660 ""
4661 {
4662 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4663 DONE;
4664 })
4665
4666 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4667 (define_insn "*avx_interleave_highv2di"
4668 [(set (match_operand:V2DI 0 "register_operand" "=x")
4669 (vec_select:V2DI
4670 (vec_concat:V4DI
4671 (match_operand:V2DI 1 "register_operand" "x")
4672 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4673 (parallel [(const_int 1)
4674 (const_int 3)])))]
4675 "TARGET_AVX"
4676 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4677 [(set_attr "type" "sselog")
4678 (set_attr "prefix" "vex")
4679 (set_attr "mode" "TI")])
4680
4681 (define_insn "vec_interleave_highv2di"
4682 [(set (match_operand:V2DI 0 "register_operand" "=x")
4683 (vec_select:V2DI
4684 (vec_concat:V4DI
4685 (match_operand:V2DI 1 "register_operand" "0")
4686 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4687 (parallel [(const_int 1)
4688 (const_int 3)])))]
4689 "TARGET_SSE2"
4690 "punpckhqdq\t{%2, %0|%0, %2}"
4691 [(set_attr "type" "sselog")
4692 (set_attr "prefix_data16" "1")
4693 (set_attr "mode" "TI")])
4694
4695 (define_insn "*avx_interleave_lowv2di"
4696 [(set (match_operand:V2DI 0 "register_operand" "=x")
4697 (vec_select:V2DI
4698 (vec_concat:V4DI
4699 (match_operand:V2DI 1 "register_operand" "x")
4700 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4701 (parallel [(const_int 0)
4702 (const_int 2)])))]
4703 "TARGET_AVX"
4704 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4705 [(set_attr "type" "sselog")
4706 (set_attr "prefix" "vex")
4707 (set_attr "mode" "TI")])
4708
4709 (define_insn "vec_interleave_lowv2di"
4710 [(set (match_operand:V2DI 0 "register_operand" "=x")
4711 (vec_select:V2DI
4712 (vec_concat:V4DI
4713 (match_operand:V2DI 1 "register_operand" "0")
4714 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4715 (parallel [(const_int 0)
4716 (const_int 2)])))]
4717 "TARGET_SSE2"
4718 "punpcklqdq\t{%2, %0|%0, %2}"
4719 [(set_attr "type" "sselog")
4720 (set_attr "prefix_data16" "1")
4721 (set_attr "mode" "TI")])
4722
4723 (define_insn "*avx_shufpd_<mode>"
4724 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4725 (vec_select:SSEMODE2D
4726 (vec_concat:<ssedoublesizemode>
4727 (match_operand:SSEMODE2D 1 "register_operand" "x")
4728 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4729 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4730 (match_operand 4 "const_2_to_3_operand" "")])))]
4731 "TARGET_AVX"
4732 {
4733 int mask;
4734 mask = INTVAL (operands[3]);
4735 mask |= (INTVAL (operands[4]) - 2) << 1;
4736 operands[3] = GEN_INT (mask);
4737
4738 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4739 }
4740 [(set_attr "type" "sselog")
4741 (set_attr "length_immediate" "1")
4742 (set_attr "prefix" "vex")
4743 (set_attr "mode" "V2DF")])
4744
4745 (define_insn "sse2_shufpd_<mode>"
4746 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4747 (vec_select:SSEMODE2D
4748 (vec_concat:<ssedoublesizemode>
4749 (match_operand:SSEMODE2D 1 "register_operand" "0")
4750 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4751 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4752 (match_operand 4 "const_2_to_3_operand" "")])))]
4753 "TARGET_SSE2"
4754 {
4755 int mask;
4756 mask = INTVAL (operands[3]);
4757 mask |= (INTVAL (operands[4]) - 2) << 1;
4758 operands[3] = GEN_INT (mask);
4759
4760 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4761 }
4762 [(set_attr "type" "sselog")
4763 (set_attr "length_immediate" "1")
4764 (set_attr "mode" "V2DF")])
4765
4766 ;; Avoid combining registers from different units in a single alternative,
4767 ;; see comment above inline_secondary_memory_needed function in i386.c
4768 (define_insn "*avx_storehpd"
4769 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4770 (vec_select:DF
4771 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4772 (parallel [(const_int 1)])))]
4773 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4774 "@
4775 vmovhpd\t{%1, %0|%0, %1}
4776 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4777 #
4778 #
4779 #"
4780 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4781 (set_attr "prefix" "vex")
4782 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4783
4784 (define_insn "sse2_storehpd"
4785 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4786 (vec_select:DF
4787 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4788 (parallel [(const_int 1)])))]
4789 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4790 "@
4791 movhpd\t{%1, %0|%0, %1}
4792 unpckhpd\t%0, %0
4793 #
4794 #
4795 #"
4796 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4797 (set_attr "prefix_data16" "1,*,*,*,*")
4798 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4799
4800 (define_split
4801 [(set (match_operand:DF 0 "register_operand" "")
4802 (vec_select:DF
4803 (match_operand:V2DF 1 "memory_operand" "")
4804 (parallel [(const_int 1)])))]
4805 "TARGET_SSE2 && reload_completed"
4806 [(set (match_dup 0) (match_dup 1))]
4807 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4808
4809 ;; Avoid combining registers from different units in a single alternative,
4810 ;; see comment above inline_secondary_memory_needed function in i386.c
4811 (define_insn "sse2_storelpd"
4812 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4813 (vec_select:DF
4814 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4815 (parallel [(const_int 0)])))]
4816 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4817 "@
4818 %vmovlpd\t{%1, %0|%0, %1}
4819 #
4820 #
4821 #
4822 #"
4823 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4824 (set_attr "prefix_data16" "1,*,*,*,*")
4825 (set_attr "prefix" "maybe_vex")
4826 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4827
4828 (define_split
4829 [(set (match_operand:DF 0 "register_operand" "")
4830 (vec_select:DF
4831 (match_operand:V2DF 1 "nonimmediate_operand" "")
4832 (parallel [(const_int 0)])))]
4833 "TARGET_SSE2 && reload_completed"
4834 [(const_int 0)]
4835 {
4836 rtx op1 = operands[1];
4837 if (REG_P (op1))
4838 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4839 else
4840 op1 = gen_lowpart (DFmode, op1);
4841 emit_move_insn (operands[0], op1);
4842 DONE;
4843 })
4844
4845 (define_expand "sse2_loadhpd_exp"
4846 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4847 (vec_concat:V2DF
4848 (vec_select:DF
4849 (match_operand:V2DF 1 "nonimmediate_operand" "")
4850 (parallel [(const_int 0)]))
4851 (match_operand:DF 2 "nonimmediate_operand" "")))]
4852 "TARGET_SSE2"
4853 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4854
4855 ;; Avoid combining registers from different units in a single alternative,
4856 ;; see comment above inline_secondary_memory_needed function in i386.c
4857 (define_insn "*avx_loadhpd"
4858 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4859 (vec_concat:V2DF
4860 (vec_select:DF
4861 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4862 (parallel [(const_int 0)]))
4863 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4864 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4865 "@
4866 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4867 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4868 #
4869 #
4870 #"
4871 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4872 (set_attr "prefix" "vex")
4873 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4874
4875 (define_insn "sse2_loadhpd"
4876 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4877 (vec_concat:V2DF
4878 (vec_select:DF
4879 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4880 (parallel [(const_int 0)]))
4881 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4882 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4883 "@
4884 movhpd\t{%2, %0|%0, %2}
4885 unpcklpd\t{%2, %0|%0, %2}
4886 shufpd\t{$1, %1, %0|%0, %1, 1}
4887 #
4888 #
4889 #"
4890 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4891 (set_attr "prefix_data16" "1,*,*,*,*,*")
4892 (set_attr "length_immediate" "*,*,1,*,*,*")
4893 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4894
4895 (define_split
4896 [(set (match_operand:V2DF 0 "memory_operand" "")
4897 (vec_concat:V2DF
4898 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4899 (match_operand:DF 1 "register_operand" "")))]
4900 "TARGET_SSE2 && reload_completed"
4901 [(set (match_dup 0) (match_dup 1))]
4902 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4903
4904 (define_expand "sse2_loadlpd_exp"
4905 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4906 (vec_concat:V2DF
4907 (match_operand:DF 2 "nonimmediate_operand" "")
4908 (vec_select:DF
4909 (match_operand:V2DF 1 "nonimmediate_operand" "")
4910 (parallel [(const_int 1)]))))]
4911 "TARGET_SSE2"
4912 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4913
4914 ;; Avoid combining registers from different units in a single alternative,
4915 ;; see comment above inline_secondary_memory_needed function in i386.c
4916 (define_insn "*avx_loadlpd"
4917 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
4918 (vec_concat:V2DF
4919 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
4920 (vec_select:DF
4921 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4922 (parallel [(const_int 1)]))))]
4923 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4924 "@
4925 vmovsd\t{%2, %0|%0, %2}
4926 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4927 vmovsd\t{%2, %1, %0|%0, %1, %2}
4928 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4929 #
4930 #
4931 #"
4932 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4933 (set_attr "prefix" "vex")
4934 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4935
4936 (define_insn "sse2_loadlpd"
4937 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
4938 (vec_concat:V2DF
4939 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
4940 (vec_select:DF
4941 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4942 (parallel [(const_int 1)]))))]
4943 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4944 "@
4945 movsd\t{%2, %0|%0, %2}
4946 movlpd\t{%2, %0|%0, %2}
4947 movsd\t{%2, %0|%0, %2}
4948 shufpd\t{$2, %2, %0|%0, %2, 2}
4949 movhpd\t{%H1, %0|%0, %H1}
4950 #
4951 #
4952 #"
4953 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4954 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4955 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4956 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4957
4958 (define_split
4959 [(set (match_operand:V2DF 0 "memory_operand" "")
4960 (vec_concat:V2DF
4961 (match_operand:DF 1 "register_operand" "")
4962 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4963 "TARGET_SSE2 && reload_completed"
4964 [(set (match_dup 0) (match_dup 1))]
4965 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4966
4967 ;; Not sure these two are ever used, but it doesn't hurt to have
4968 ;; them. -aoliva
4969 (define_insn "*vec_extractv2df_1_sse"
4970 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4971 (vec_select:DF
4972 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4973 (parallel [(const_int 1)])))]
4974 "!TARGET_SSE2 && TARGET_SSE
4975 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4976 "@
4977 movhps\t{%1, %0|%0, %1}
4978 movhlps\t{%1, %0|%0, %1}
4979 movlps\t{%H1, %0|%0, %H1}"
4980 [(set_attr "type" "ssemov")
4981 (set_attr "mode" "V2SF,V4SF,V2SF")])
4982
4983 (define_insn "*vec_extractv2df_0_sse"
4984 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4985 (vec_select:DF
4986 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4987 (parallel [(const_int 0)])))]
4988 "!TARGET_SSE2 && TARGET_SSE
4989 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4990 "@
4991 movlps\t{%1, %0|%0, %1}
4992 movaps\t{%1, %0|%0, %1}
4993 movlps\t{%1, %0|%0, %1}"
4994 [(set_attr "type" "ssemov")
4995 (set_attr "mode" "V2SF,V4SF,V2SF")])
4996
4997 (define_insn "*avx_movsd"
4998 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
4999 (vec_merge:V2DF
5000 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5001 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5002 (const_int 1)))]
5003 "TARGET_AVX"
5004 "@
5005 vmovsd\t{%2, %1, %0|%0, %1, %2}
5006 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5007 vmovlpd\t{%2, %0|%0, %2}
5008 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5009 vmovhps\t{%1, %H0|%H0, %1}"
5010 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5011 (set_attr "prefix" "vex")
5012 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5013
5014 (define_insn "sse2_movsd"
5015 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5016 (vec_merge:V2DF
5017 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5018 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5019 (const_int 1)))]
5020 "TARGET_SSE2"
5021 "@
5022 movsd\t{%2, %0|%0, %2}
5023 movlpd\t{%2, %0|%0, %2}
5024 movlpd\t{%2, %0|%0, %2}
5025 shufpd\t{$2, %2, %0|%0, %2, 2}
5026 movhps\t{%H1, %0|%0, %H1}
5027 movhps\t{%1, %H0|%H0, %1}"
5028 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5029 (set_attr "prefix_data16" "*,1,1,*,*,*")
5030 (set_attr "length_immediate" "*,*,*,1,*,*")
5031 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5032
5033 (define_insn "*vec_dupv2df_sse3"
5034 [(set (match_operand:V2DF 0 "register_operand" "=x")
5035 (vec_duplicate:V2DF
5036 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5037 "TARGET_SSE3"
5038 "%vmovddup\t{%1, %0|%0, %1}"
5039 [(set_attr "type" "sselog1")
5040 (set_attr "prefix" "maybe_vex")
5041 (set_attr "mode" "DF")])
5042
5043 (define_insn "vec_dupv2df"
5044 [(set (match_operand:V2DF 0 "register_operand" "=x")
5045 (vec_duplicate:V2DF
5046 (match_operand:DF 1 "register_operand" "0")))]
5047 "TARGET_SSE2"
5048 "unpcklpd\t%0, %0"
5049 [(set_attr "type" "sselog1")
5050 (set_attr "mode" "V2DF")])
5051
5052 (define_insn "*vec_concatv2df_sse3"
5053 [(set (match_operand:V2DF 0 "register_operand" "=x")
5054 (vec_concat:V2DF
5055 (match_operand:DF 1 "nonimmediate_operand" "xm")
5056 (match_dup 1)))]
5057 "TARGET_SSE3"
5058 "%vmovddup\t{%1, %0|%0, %1}"
5059 [(set_attr "type" "sselog1")
5060 (set_attr "prefix" "maybe_vex")
5061 (set_attr "mode" "DF")])
5062
5063 (define_insn "*vec_concatv2df_avx"
5064 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5065 (vec_concat:V2DF
5066 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5067 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5068 "TARGET_AVX"
5069 "@
5070 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5071 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5072 vmovsd\t{%1, %0|%0, %1}"
5073 [(set_attr "type" "ssemov")
5074 (set_attr "prefix" "vex")
5075 (set_attr "mode" "DF,V1DF,DF")])
5076
5077 (define_insn "*vec_concatv2df"
5078 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5079 (vec_concat:V2DF
5080 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5081 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5082 "TARGET_SSE"
5083 "@
5084 unpcklpd\t{%2, %0|%0, %2}
5085 movhpd\t{%2, %0|%0, %2}
5086 movsd\t{%1, %0|%0, %1}
5087 movlhps\t{%2, %0|%0, %2}
5088 movhps\t{%2, %0|%0, %2}"
5089 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5090 (set_attr "prefix_data16" "*,1,*,*,*")
5091 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5092
5093 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5094 ;;
5095 ;; Parallel integral arithmetic
5096 ;;
5097 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5098
5099 (define_expand "neg<mode>2"
5100 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5101 (minus:SSEMODEI
5102 (match_dup 2)
5103 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5104 "TARGET_SSE2"
5105 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5106
5107 (define_expand "<plusminus_insn><mode>3"
5108 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5109 (plusminus:SSEMODEI
5110 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5111 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5112 "TARGET_SSE2"
5113 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5114
5115 (define_insn "*avx_<plusminus_insn><mode>3"
5116 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5117 (plusminus:SSEMODEI
5118 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5119 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5120 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5121 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5122 [(set_attr "type" "sseiadd")
5123 (set_attr "prefix" "vex")
5124 (set_attr "mode" "TI")])
5125
5126 (define_insn "*<plusminus_insn><mode>3"
5127 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5128 (plusminus:SSEMODEI
5129 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5130 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5131 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5132 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5133 [(set_attr "type" "sseiadd")
5134 (set_attr "prefix_data16" "1")
5135 (set_attr "mode" "TI")])
5136
5137 (define_expand "sse2_<plusminus_insn><mode>3"
5138 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5139 (sat_plusminus:SSEMODE12
5140 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5141 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5142 "TARGET_SSE2"
5143 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5144
5145 (define_insn "*avx_<plusminus_insn><mode>3"
5146 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5147 (sat_plusminus:SSEMODE12
5148 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5149 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5150 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5151 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5152 [(set_attr "type" "sseiadd")
5153 (set_attr "prefix" "vex")
5154 (set_attr "mode" "TI")])
5155
5156 (define_insn "*sse2_<plusminus_insn><mode>3"
5157 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5158 (sat_plusminus:SSEMODE12
5159 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5160 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5161 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5162 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5163 [(set_attr "type" "sseiadd")
5164 (set_attr "prefix_data16" "1")
5165 (set_attr "mode" "TI")])
5166
5167 (define_insn_and_split "mulv16qi3"
5168 [(set (match_operand:V16QI 0 "register_operand" "")
5169 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5170 (match_operand:V16QI 2 "register_operand" "")))]
5171 "TARGET_SSE2
5172 && can_create_pseudo_p ()"
5173 "#"
5174 "&& 1"
5175 [(const_int 0)]
5176 {
5177 rtx t[6];
5178 int i;
5179
5180 for (i = 0; i < 6; ++i)
5181 t[i] = gen_reg_rtx (V16QImode);
5182
5183 /* Unpack data such that we've got a source byte in each low byte of
5184 each word. We don't care what goes into the high byte of each word.
5185 Rather than trying to get zero in there, most convenient is to let
5186 it be a copy of the low byte. */
5187 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5188 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5189 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5190 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5191
5192 /* Multiply words. The end-of-line annotations here give a picture of what
5193 the output of that instruction looks like. Dot means don't care; the
5194 letters are the bytes of the result with A being the most significant. */
5195 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5196 gen_lowpart (V8HImode, t[0]),
5197 gen_lowpart (V8HImode, t[1])));
5198 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5199 gen_lowpart (V8HImode, t[2]),
5200 gen_lowpart (V8HImode, t[3])));
5201
5202 /* Extract the even bytes and merge them back together. */
5203 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5204 DONE;
5205 })
5206
5207 (define_expand "mulv8hi3"
5208 [(set (match_operand:V8HI 0 "register_operand" "")
5209 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5210 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5211 "TARGET_SSE2"
5212 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5213
5214 (define_insn "*avx_mulv8hi3"
5215 [(set (match_operand:V8HI 0 "register_operand" "=x")
5216 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5217 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5218 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5219 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5220 [(set_attr "type" "sseimul")
5221 (set_attr "prefix" "vex")
5222 (set_attr "mode" "TI")])
5223
5224 (define_insn "*mulv8hi3"
5225 [(set (match_operand:V8HI 0 "register_operand" "=x")
5226 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5227 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5228 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5229 "pmullw\t{%2, %0|%0, %2}"
5230 [(set_attr "type" "sseimul")
5231 (set_attr "prefix_data16" "1")
5232 (set_attr "mode" "TI")])
5233
5234 (define_expand "<s>mulv8hi3_highpart"
5235 [(set (match_operand:V8HI 0 "register_operand" "")
5236 (truncate:V8HI
5237 (lshiftrt:V8SI
5238 (mult:V8SI
5239 (any_extend:V8SI
5240 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5241 (any_extend:V8SI
5242 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5243 (const_int 16))))]
5244 "TARGET_SSE2"
5245 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5246
5247 (define_insn "*avx_<s>mulv8hi3_highpart"
5248 [(set (match_operand:V8HI 0 "register_operand" "=x")
5249 (truncate:V8HI
5250 (lshiftrt:V8SI
5251 (mult:V8SI
5252 (any_extend:V8SI
5253 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5254 (any_extend:V8SI
5255 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5256 (const_int 16))))]
5257 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5258 "vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5259 [(set_attr "type" "sseimul")
5260 (set_attr "prefix" "vex")
5261 (set_attr "mode" "TI")])
5262
5263 (define_insn "*<s>mulv8hi3_highpart"
5264 [(set (match_operand:V8HI 0 "register_operand" "=x")
5265 (truncate:V8HI
5266 (lshiftrt:V8SI
5267 (mult:V8SI
5268 (any_extend:V8SI
5269 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5270 (any_extend:V8SI
5271 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5272 (const_int 16))))]
5273 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5274 "pmulh<u>w\t{%2, %0|%0, %2}"
5275 [(set_attr "type" "sseimul")
5276 (set_attr "prefix_data16" "1")
5277 (set_attr "mode" "TI")])
5278
5279 (define_expand "sse2_umulv2siv2di3"
5280 [(set (match_operand:V2DI 0 "register_operand" "")
5281 (mult:V2DI
5282 (zero_extend:V2DI
5283 (vec_select:V2SI
5284 (match_operand:V4SI 1 "nonimmediate_operand" "")
5285 (parallel [(const_int 0) (const_int 2)])))
5286 (zero_extend:V2DI
5287 (vec_select:V2SI
5288 (match_operand:V4SI 2 "nonimmediate_operand" "")
5289 (parallel [(const_int 0) (const_int 2)])))))]
5290 "TARGET_SSE2"
5291 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5292
5293 (define_insn "*avx_umulv2siv2di3"
5294 [(set (match_operand:V2DI 0 "register_operand" "=x")
5295 (mult:V2DI
5296 (zero_extend:V2DI
5297 (vec_select:V2SI
5298 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5299 (parallel [(const_int 0) (const_int 2)])))
5300 (zero_extend:V2DI
5301 (vec_select:V2SI
5302 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5303 (parallel [(const_int 0) (const_int 2)])))))]
5304 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5305 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5306 [(set_attr "type" "sseimul")
5307 (set_attr "prefix" "vex")
5308 (set_attr "mode" "TI")])
5309
5310 (define_insn "*sse2_umulv2siv2di3"
5311 [(set (match_operand:V2DI 0 "register_operand" "=x")
5312 (mult:V2DI
5313 (zero_extend:V2DI
5314 (vec_select:V2SI
5315 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5316 (parallel [(const_int 0) (const_int 2)])))
5317 (zero_extend:V2DI
5318 (vec_select:V2SI
5319 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5320 (parallel [(const_int 0) (const_int 2)])))))]
5321 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5322 "pmuludq\t{%2, %0|%0, %2}"
5323 [(set_attr "type" "sseimul")
5324 (set_attr "prefix_data16" "1")
5325 (set_attr "mode" "TI")])
5326
5327 (define_expand "sse4_1_mulv2siv2di3"
5328 [(set (match_operand:V2DI 0 "register_operand" "")
5329 (mult:V2DI
5330 (sign_extend:V2DI
5331 (vec_select:V2SI
5332 (match_operand:V4SI 1 "nonimmediate_operand" "")
5333 (parallel [(const_int 0) (const_int 2)])))
5334 (sign_extend:V2DI
5335 (vec_select:V2SI
5336 (match_operand:V4SI 2 "nonimmediate_operand" "")
5337 (parallel [(const_int 0) (const_int 2)])))))]
5338 "TARGET_SSE4_1"
5339 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5340
5341 (define_insn "*avx_mulv2siv2di3"
5342 [(set (match_operand:V2DI 0 "register_operand" "=x")
5343 (mult:V2DI
5344 (sign_extend:V2DI
5345 (vec_select:V2SI
5346 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5347 (parallel [(const_int 0) (const_int 2)])))
5348 (sign_extend:V2DI
5349 (vec_select:V2SI
5350 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5351 (parallel [(const_int 0) (const_int 2)])))))]
5352 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5353 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5354 [(set_attr "type" "sseimul")
5355 (set_attr "prefix_extra" "1")
5356 (set_attr "prefix" "vex")
5357 (set_attr "mode" "TI")])
5358
5359 (define_insn "*sse4_1_mulv2siv2di3"
5360 [(set (match_operand:V2DI 0 "register_operand" "=x")
5361 (mult:V2DI
5362 (sign_extend:V2DI
5363 (vec_select:V2SI
5364 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5365 (parallel [(const_int 0) (const_int 2)])))
5366 (sign_extend:V2DI
5367 (vec_select:V2SI
5368 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5369 (parallel [(const_int 0) (const_int 2)])))))]
5370 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5371 "pmuldq\t{%2, %0|%0, %2}"
5372 [(set_attr "type" "sseimul")
5373 (set_attr "prefix_extra" "1")
5374 (set_attr "mode" "TI")])
5375
5376 (define_expand "sse2_pmaddwd"
5377 [(set (match_operand:V4SI 0 "register_operand" "")
5378 (plus:V4SI
5379 (mult:V4SI
5380 (sign_extend:V4SI
5381 (vec_select:V4HI
5382 (match_operand:V8HI 1 "nonimmediate_operand" "")
5383 (parallel [(const_int 0)
5384 (const_int 2)
5385 (const_int 4)
5386 (const_int 6)])))
5387 (sign_extend:V4SI
5388 (vec_select:V4HI
5389 (match_operand:V8HI 2 "nonimmediate_operand" "")
5390 (parallel [(const_int 0)
5391 (const_int 2)
5392 (const_int 4)
5393 (const_int 6)]))))
5394 (mult:V4SI
5395 (sign_extend:V4SI
5396 (vec_select:V4HI (match_dup 1)
5397 (parallel [(const_int 1)
5398 (const_int 3)
5399 (const_int 5)
5400 (const_int 7)])))
5401 (sign_extend:V4SI
5402 (vec_select:V4HI (match_dup 2)
5403 (parallel [(const_int 1)
5404 (const_int 3)
5405 (const_int 5)
5406 (const_int 7)]))))))]
5407 "TARGET_SSE2"
5408 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5409
5410 (define_insn "*avx_pmaddwd"
5411 [(set (match_operand:V4SI 0 "register_operand" "=x")
5412 (plus:V4SI
5413 (mult:V4SI
5414 (sign_extend:V4SI
5415 (vec_select:V4HI
5416 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5417 (parallel [(const_int 0)
5418 (const_int 2)
5419 (const_int 4)
5420 (const_int 6)])))
5421 (sign_extend:V4SI
5422 (vec_select:V4HI
5423 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5424 (parallel [(const_int 0)
5425 (const_int 2)
5426 (const_int 4)
5427 (const_int 6)]))))
5428 (mult:V4SI
5429 (sign_extend:V4SI
5430 (vec_select:V4HI (match_dup 1)
5431 (parallel [(const_int 1)
5432 (const_int 3)
5433 (const_int 5)
5434 (const_int 7)])))
5435 (sign_extend:V4SI
5436 (vec_select:V4HI (match_dup 2)
5437 (parallel [(const_int 1)
5438 (const_int 3)
5439 (const_int 5)
5440 (const_int 7)]))))))]
5441 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5442 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5443 [(set_attr "type" "sseiadd")
5444 (set_attr "prefix" "vex")
5445 (set_attr "mode" "TI")])
5446
5447 (define_insn "*sse2_pmaddwd"
5448 [(set (match_operand:V4SI 0 "register_operand" "=x")
5449 (plus:V4SI
5450 (mult:V4SI
5451 (sign_extend:V4SI
5452 (vec_select:V4HI
5453 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5454 (parallel [(const_int 0)
5455 (const_int 2)
5456 (const_int 4)
5457 (const_int 6)])))
5458 (sign_extend:V4SI
5459 (vec_select:V4HI
5460 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5461 (parallel [(const_int 0)
5462 (const_int 2)
5463 (const_int 4)
5464 (const_int 6)]))))
5465 (mult:V4SI
5466 (sign_extend:V4SI
5467 (vec_select:V4HI (match_dup 1)
5468 (parallel [(const_int 1)
5469 (const_int 3)
5470 (const_int 5)
5471 (const_int 7)])))
5472 (sign_extend:V4SI
5473 (vec_select:V4HI (match_dup 2)
5474 (parallel [(const_int 1)
5475 (const_int 3)
5476 (const_int 5)
5477 (const_int 7)]))))))]
5478 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5479 "pmaddwd\t{%2, %0|%0, %2}"
5480 [(set_attr "type" "sseiadd")
5481 (set_attr "atom_unit" "simul")
5482 (set_attr "prefix_data16" "1")
5483 (set_attr "mode" "TI")])
5484
5485 (define_expand "mulv4si3"
5486 [(set (match_operand:V4SI 0 "register_operand" "")
5487 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5488 (match_operand:V4SI 2 "register_operand" "")))]
5489 "TARGET_SSE2"
5490 {
5491 if (TARGET_SSE4_1 || TARGET_AVX)
5492 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5493 })
5494
5495 (define_insn "*avx_mulv4si3"
5496 [(set (match_operand:V4SI 0 "register_operand" "=x")
5497 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5498 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5499 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5500 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5501 [(set_attr "type" "sseimul")
5502 (set_attr "prefix_extra" "1")
5503 (set_attr "prefix" "vex")
5504 (set_attr "mode" "TI")])
5505
5506 (define_insn "*sse4_1_mulv4si3"
5507 [(set (match_operand:V4SI 0 "register_operand" "=x")
5508 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5509 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5510 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5511 "pmulld\t{%2, %0|%0, %2}"
5512 [(set_attr "type" "sseimul")
5513 (set_attr "prefix_extra" "1")
5514 (set_attr "mode" "TI")])
5515
5516 (define_insn_and_split "*sse2_mulv4si3"
5517 [(set (match_operand:V4SI 0 "register_operand" "")
5518 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5519 (match_operand:V4SI 2 "register_operand" "")))]
5520 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5521 && can_create_pseudo_p ()"
5522 "#"
5523 "&& 1"
5524 [(const_int 0)]
5525 {
5526 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5527 rtx op0, op1, op2;
5528
5529 op0 = operands[0];
5530 op1 = operands[1];
5531 op2 = operands[2];
5532 t1 = gen_reg_rtx (V4SImode);
5533 t2 = gen_reg_rtx (V4SImode);
5534 t3 = gen_reg_rtx (V4SImode);
5535 t4 = gen_reg_rtx (V4SImode);
5536 t5 = gen_reg_rtx (V4SImode);
5537 t6 = gen_reg_rtx (V4SImode);
5538 thirtytwo = GEN_INT (32);
5539
5540 /* Multiply elements 2 and 0. */
5541 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5542 op1, op2));
5543
5544 /* Shift both input vectors down one element, so that elements 3
5545 and 1 are now in the slots for elements 2 and 0. For K8, at
5546 least, this is faster than using a shuffle. */
5547 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5548 gen_lowpart (V1TImode, op1),
5549 thirtytwo));
5550 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5551 gen_lowpart (V1TImode, op2),
5552 thirtytwo));
5553 /* Multiply elements 3 and 1. */
5554 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5555 t2, t3));
5556
5557 /* Move the results in element 2 down to element 1; we don't care
5558 what goes in elements 2 and 3. */
5559 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5560 const0_rtx, const0_rtx));
5561 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5562 const0_rtx, const0_rtx));
5563
5564 /* Merge the parts back together. */
5565 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5566 DONE;
5567 })
5568
5569 (define_insn_and_split "mulv2di3"
5570 [(set (match_operand:V2DI 0 "register_operand" "")
5571 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5572 (match_operand:V2DI 2 "register_operand" "")))]
5573 "TARGET_SSE2
5574 && can_create_pseudo_p ()"
5575 "#"
5576 "&& 1"
5577 [(const_int 0)]
5578 {
5579 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5580 rtx op0, op1, op2;
5581
5582 op0 = operands[0];
5583 op1 = operands[1];
5584 op2 = operands[2];
5585
5586 if (TARGET_XOP)
5587 {
5588 /* op1: A,B,C,D, op2: E,F,G,H */
5589 op1 = gen_lowpart (V4SImode, op1);
5590 op2 = gen_lowpart (V4SImode, op2);
5591
5592 t1 = gen_reg_rtx (V4SImode);
5593 t2 = gen_reg_rtx (V4SImode);
5594 t3 = gen_reg_rtx (V2DImode);
5595 t4 = gen_reg_rtx (V2DImode);
5596
5597 /* t1: B,A,D,C */
5598 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5599 GEN_INT (1),
5600 GEN_INT (0),
5601 GEN_INT (3),
5602 GEN_INT (2)));
5603
5604 /* t2: (B*E),(A*F),(D*G),(C*H) */
5605 emit_insn (gen_mulv4si3 (t2, t1, op2));
5606
5607 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5608 emit_insn (gen_xop_phadddq (t3, t2));
5609
5610 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5611 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5612
5613 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5614 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5615 }
5616 else
5617 {
5618 t1 = gen_reg_rtx (V2DImode);
5619 t2 = gen_reg_rtx (V2DImode);
5620 t3 = gen_reg_rtx (V2DImode);
5621 t4 = gen_reg_rtx (V2DImode);
5622 t5 = gen_reg_rtx (V2DImode);
5623 t6 = gen_reg_rtx (V2DImode);
5624 thirtytwo = GEN_INT (32);
5625
5626 /* Multiply low parts. */
5627 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5628 gen_lowpart (V4SImode, op2)));
5629
5630 /* Shift input vectors left 32 bits so we can multiply high parts. */
5631 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5632 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5633
5634 /* Multiply high parts by low parts. */
5635 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5636 gen_lowpart (V4SImode, t3)));
5637 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5638 gen_lowpart (V4SImode, t2)));
5639
5640 /* Shift them back. */
5641 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5642 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5643
5644 /* Add the three parts together. */
5645 emit_insn (gen_addv2di3 (t6, t1, t4));
5646 emit_insn (gen_addv2di3 (op0, t6, t5));
5647 }
5648 DONE;
5649 })
5650
5651 (define_expand "vec_widen_smult_hi_v8hi"
5652 [(match_operand:V4SI 0 "register_operand" "")
5653 (match_operand:V8HI 1 "register_operand" "")
5654 (match_operand:V8HI 2 "register_operand" "")]
5655 "TARGET_SSE2"
5656 {
5657 rtx op1, op2, t1, t2, dest;
5658
5659 op1 = operands[1];
5660 op2 = operands[2];
5661 t1 = gen_reg_rtx (V8HImode);
5662 t2 = gen_reg_rtx (V8HImode);
5663 dest = gen_lowpart (V8HImode, operands[0]);
5664
5665 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5666 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5667 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5668 DONE;
5669 })
5670
5671 (define_expand "vec_widen_smult_lo_v8hi"
5672 [(match_operand:V4SI 0 "register_operand" "")
5673 (match_operand:V8HI 1 "register_operand" "")
5674 (match_operand:V8HI 2 "register_operand" "")]
5675 "TARGET_SSE2"
5676 {
5677 rtx op1, op2, t1, t2, dest;
5678
5679 op1 = operands[1];
5680 op2 = operands[2];
5681 t1 = gen_reg_rtx (V8HImode);
5682 t2 = gen_reg_rtx (V8HImode);
5683 dest = gen_lowpart (V8HImode, operands[0]);
5684
5685 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5686 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5687 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5688 DONE;
5689 })
5690
5691 (define_expand "vec_widen_umult_hi_v8hi"
5692 [(match_operand:V4SI 0 "register_operand" "")
5693 (match_operand:V8HI 1 "register_operand" "")
5694 (match_operand:V8HI 2 "register_operand" "")]
5695 "TARGET_SSE2"
5696 {
5697 rtx op1, op2, t1, t2, dest;
5698
5699 op1 = operands[1];
5700 op2 = operands[2];
5701 t1 = gen_reg_rtx (V8HImode);
5702 t2 = gen_reg_rtx (V8HImode);
5703 dest = gen_lowpart (V8HImode, operands[0]);
5704
5705 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5706 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5707 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5708 DONE;
5709 })
5710
5711 (define_expand "vec_widen_umult_lo_v8hi"
5712 [(match_operand:V4SI 0 "register_operand" "")
5713 (match_operand:V8HI 1 "register_operand" "")
5714 (match_operand:V8HI 2 "register_operand" "")]
5715 "TARGET_SSE2"
5716 {
5717 rtx op1, op2, t1, t2, dest;
5718
5719 op1 = operands[1];
5720 op2 = operands[2];
5721 t1 = gen_reg_rtx (V8HImode);
5722 t2 = gen_reg_rtx (V8HImode);
5723 dest = gen_lowpart (V8HImode, operands[0]);
5724
5725 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5726 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5727 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5728 DONE;
5729 })
5730
5731 (define_expand "vec_widen_smult_hi_v4si"
5732 [(match_operand:V2DI 0 "register_operand" "")
5733 (match_operand:V4SI 1 "register_operand" "")
5734 (match_operand:V4SI 2 "register_operand" "")]
5735 "TARGET_XOP"
5736 {
5737 rtx t1, t2;
5738
5739 t1 = gen_reg_rtx (V4SImode);
5740 t2 = gen_reg_rtx (V4SImode);
5741
5742 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5743 GEN_INT (0),
5744 GEN_INT (2),
5745 GEN_INT (1),
5746 GEN_INT (3)));
5747 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5748 GEN_INT (0),
5749 GEN_INT (2),
5750 GEN_INT (1),
5751 GEN_INT (3)));
5752 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5753 DONE;
5754 })
5755
5756 (define_expand "vec_widen_smult_lo_v4si"
5757 [(match_operand:V2DI 0 "register_operand" "")
5758 (match_operand:V4SI 1 "register_operand" "")
5759 (match_operand:V4SI 2 "register_operand" "")]
5760 "TARGET_XOP"
5761 {
5762 rtx t1, t2;
5763
5764 t1 = gen_reg_rtx (V4SImode);
5765 t2 = gen_reg_rtx (V4SImode);
5766
5767 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5768 GEN_INT (0),
5769 GEN_INT (2),
5770 GEN_INT (1),
5771 GEN_INT (3)));
5772 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5773 GEN_INT (0),
5774 GEN_INT (2),
5775 GEN_INT (1),
5776 GEN_INT (3)));
5777 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5778 DONE;
5779 })
5780
5781 (define_expand "vec_widen_umult_hi_v4si"
5782 [(match_operand:V2DI 0 "register_operand" "")
5783 (match_operand:V4SI 1 "register_operand" "")
5784 (match_operand:V4SI 2 "register_operand" "")]
5785 "TARGET_SSE2"
5786 {
5787 rtx op1, op2, t1, t2;
5788
5789 op1 = operands[1];
5790 op2 = operands[2];
5791 t1 = gen_reg_rtx (V4SImode);
5792 t2 = gen_reg_rtx (V4SImode);
5793
5794 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5795 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5796 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5797 DONE;
5798 })
5799
5800 (define_expand "vec_widen_umult_lo_v4si"
5801 [(match_operand:V2DI 0 "register_operand" "")
5802 (match_operand:V4SI 1 "register_operand" "")
5803 (match_operand:V4SI 2 "register_operand" "")]
5804 "TARGET_SSE2"
5805 {
5806 rtx op1, op2, t1, t2;
5807
5808 op1 = operands[1];
5809 op2 = operands[2];
5810 t1 = gen_reg_rtx (V4SImode);
5811 t2 = gen_reg_rtx (V4SImode);
5812
5813 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5814 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5815 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5816 DONE;
5817 })
5818
5819 (define_expand "sdot_prodv8hi"
5820 [(match_operand:V4SI 0 "register_operand" "")
5821 (match_operand:V8HI 1 "register_operand" "")
5822 (match_operand:V8HI 2 "register_operand" "")
5823 (match_operand:V4SI 3 "register_operand" "")]
5824 "TARGET_SSE2"
5825 {
5826 rtx t = gen_reg_rtx (V4SImode);
5827 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5828 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5829 DONE;
5830 })
5831
5832 (define_expand "udot_prodv4si"
5833 [(match_operand:V2DI 0 "register_operand" "")
5834 (match_operand:V4SI 1 "register_operand" "")
5835 (match_operand:V4SI 2 "register_operand" "")
5836 (match_operand:V2DI 3 "register_operand" "")]
5837 "TARGET_SSE2"
5838 {
5839 rtx t1, t2, t3, t4;
5840
5841 t1 = gen_reg_rtx (V2DImode);
5842 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5843 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5844
5845 t2 = gen_reg_rtx (V4SImode);
5846 t3 = gen_reg_rtx (V4SImode);
5847 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5848 gen_lowpart (V1TImode, operands[1]),
5849 GEN_INT (32)));
5850 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5851 gen_lowpart (V1TImode, operands[2]),
5852 GEN_INT (32)));
5853
5854 t4 = gen_reg_rtx (V2DImode);
5855 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5856
5857 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5858 DONE;
5859 })
5860
5861 (define_insn "*avx_ashr<mode>3"
5862 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5863 (ashiftrt:SSEMODE24
5864 (match_operand:SSEMODE24 1 "register_operand" "x")
5865 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5866 "TARGET_AVX"
5867 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5868 [(set_attr "type" "sseishft")
5869 (set_attr "prefix" "vex")
5870 (set (attr "length_immediate")
5871 (if_then_else (match_operand 2 "const_int_operand" "")
5872 (const_string "1")
5873 (const_string "0")))
5874 (set_attr "mode" "TI")])
5875
5876 (define_insn "ashr<mode>3"
5877 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5878 (ashiftrt:SSEMODE24
5879 (match_operand:SSEMODE24 1 "register_operand" "0")
5880 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5881 "TARGET_SSE2"
5882 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5883 [(set_attr "type" "sseishft")
5884 (set_attr "prefix_data16" "1")
5885 (set (attr "length_immediate")
5886 (if_then_else (match_operand 2 "const_int_operand" "")
5887 (const_string "1")
5888 (const_string "0")))
5889 (set_attr "mode" "TI")])
5890
5891 (define_insn "*avx_lshrv1ti3"
5892 [(set (match_operand:V1TI 0 "register_operand" "=x")
5893 (lshiftrt:V1TI
5894 (match_operand:V1TI 1 "register_operand" "x")
5895 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5896 "TARGET_AVX"
5897 {
5898 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5899 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5900 }
5901 [(set_attr "type" "sseishft")
5902 (set_attr "prefix" "vex")
5903 (set_attr "length_immediate" "1")
5904 (set_attr "mode" "TI")])
5905
5906 (define_insn "*avx_lshr<mode>3"
5907 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5908 (lshiftrt:SSEMODE248
5909 (match_operand:SSEMODE248 1 "register_operand" "x")
5910 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5911 "TARGET_AVX"
5912 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5913 [(set_attr "type" "sseishft")
5914 (set_attr "prefix" "vex")
5915 (set (attr "length_immediate")
5916 (if_then_else (match_operand 2 "const_int_operand" "")
5917 (const_string "1")
5918 (const_string "0")))
5919 (set_attr "mode" "TI")])
5920
5921 (define_insn "sse2_lshrv1ti3"
5922 [(set (match_operand:V1TI 0 "register_operand" "=x")
5923 (lshiftrt:V1TI
5924 (match_operand:V1TI 1 "register_operand" "0")
5925 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5926 "TARGET_SSE2"
5927 {
5928 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5929 return "psrldq\t{%2, %0|%0, %2}";
5930 }
5931 [(set_attr "type" "sseishft")
5932 (set_attr "prefix_data16" "1")
5933 (set_attr "length_immediate" "1")
5934 (set_attr "atom_unit" "sishuf")
5935 (set_attr "mode" "TI")])
5936
5937 (define_insn "lshr<mode>3"
5938 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5939 (lshiftrt:SSEMODE248
5940 (match_operand:SSEMODE248 1 "register_operand" "0")
5941 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5942 "TARGET_SSE2"
5943 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5944 [(set_attr "type" "sseishft")
5945 (set_attr "prefix_data16" "1")
5946 (set (attr "length_immediate")
5947 (if_then_else (match_operand 2 "const_int_operand" "")
5948 (const_string "1")
5949 (const_string "0")))
5950 (set_attr "mode" "TI")])
5951
5952 (define_insn "*avx_ashlv1ti3"
5953 [(set (match_operand:V1TI 0 "register_operand" "=x")
5954 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
5955 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5956 "TARGET_AVX"
5957 {
5958 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5959 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5960 }
5961 [(set_attr "type" "sseishft")
5962 (set_attr "prefix" "vex")
5963 (set_attr "length_immediate" "1")
5964 (set_attr "mode" "TI")])
5965
5966 (define_insn "*avx_ashl<mode>3"
5967 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5968 (ashift:SSEMODE248
5969 (match_operand:SSEMODE248 1 "register_operand" "x")
5970 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5971 "TARGET_AVX"
5972 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5973 [(set_attr "type" "sseishft")
5974 (set_attr "prefix" "vex")
5975 (set (attr "length_immediate")
5976 (if_then_else (match_operand 2 "const_int_operand" "")
5977 (const_string "1")
5978 (const_string "0")))
5979 (set_attr "mode" "TI")])
5980
5981 (define_insn "sse2_ashlv1ti3"
5982 [(set (match_operand:V1TI 0 "register_operand" "=x")
5983 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
5984 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5985 "TARGET_SSE2"
5986 {
5987 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5988 return "pslldq\t{%2, %0|%0, %2}";
5989 }
5990 [(set_attr "type" "sseishft")
5991 (set_attr "prefix_data16" "1")
5992 (set_attr "length_immediate" "1")
5993 (set_attr "mode" "TI")])
5994
5995 (define_insn "ashl<mode>3"
5996 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5997 (ashift:SSEMODE248
5998 (match_operand:SSEMODE248 1 "register_operand" "0")
5999 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6000 "TARGET_SSE2"
6001 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6002 [(set_attr "type" "sseishft")
6003 (set_attr "prefix_data16" "1")
6004 (set (attr "length_immediate")
6005 (if_then_else (match_operand 2 "const_int_operand" "")
6006 (const_string "1")
6007 (const_string "0")))
6008 (set_attr "mode" "TI")])
6009
6010 (define_expand "vec_shl_<mode>"
6011 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6012 (ashift:V1TI
6013 (match_operand:SSEMODEI 1 "register_operand" "")
6014 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6015 "TARGET_SSE2"
6016 {
6017 operands[0] = gen_lowpart (V1TImode, operands[0]);
6018 operands[1] = gen_lowpart (V1TImode, operands[1]);
6019 })
6020
6021 (define_expand "vec_shr_<mode>"
6022 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6023 (lshiftrt:V1TI
6024 (match_operand:SSEMODEI 1 "register_operand" "")
6025 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6026 "TARGET_SSE2"
6027 {
6028 operands[0] = gen_lowpart (V1TImode, operands[0]);
6029 operands[1] = gen_lowpart (V1TImode, operands[1]);
6030 })
6031
6032 (define_insn "*avx_<code><mode>3"
6033 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6034 (umaxmin:SSEMODE124
6035 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6036 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6037 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6038 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6039 [(set_attr "type" "sseiadd")
6040 (set (attr "prefix_extra")
6041 (if_then_else (match_operand:V16QI 0 "" "")
6042 (const_string "0")
6043 (const_string "1")))
6044 (set_attr "prefix" "vex")
6045 (set_attr "mode" "TI")])
6046
6047 (define_expand "<code>v16qi3"
6048 [(set (match_operand:V16QI 0 "register_operand" "")
6049 (umaxmin:V16QI
6050 (match_operand:V16QI 1 "nonimmediate_operand" "")
6051 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6052 "TARGET_SSE2"
6053 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6054
6055 (define_insn "*<code>v16qi3"
6056 [(set (match_operand:V16QI 0 "register_operand" "=x")
6057 (umaxmin:V16QI
6058 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6059 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6060 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6061 "p<maxmin_int>b\t{%2, %0|%0, %2}"
6062 [(set_attr "type" "sseiadd")
6063 (set_attr "prefix_data16" "1")
6064 (set_attr "mode" "TI")])
6065
6066 (define_insn "*avx_<code><mode>3"
6067 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6068 (smaxmin:SSEMODE124
6069 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6070 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6071 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6072 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6073 [(set_attr "type" "sseiadd")
6074 (set (attr "prefix_extra")
6075 (if_then_else (match_operand:V8HI 0 "" "")
6076 (const_string "0")
6077 (const_string "1")))
6078 (set_attr "prefix" "vex")
6079 (set_attr "mode" "TI")])
6080
6081 (define_expand "<code>v8hi3"
6082 [(set (match_operand:V8HI 0 "register_operand" "")
6083 (smaxmin:V8HI
6084 (match_operand:V8HI 1 "nonimmediate_operand" "")
6085 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6086 "TARGET_SSE2"
6087 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6088
6089 (define_insn "*<code>v8hi3"
6090 [(set (match_operand:V8HI 0 "register_operand" "=x")
6091 (smaxmin:V8HI
6092 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6093 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6094 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6095 "p<maxmin_int>w\t{%2, %0|%0, %2}"
6096 [(set_attr "type" "sseiadd")
6097 (set_attr "prefix_data16" "1")
6098 (set_attr "mode" "TI")])
6099
6100 (define_expand "umaxv8hi3"
6101 [(set (match_operand:V8HI 0 "register_operand" "")
6102 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6103 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6104 "TARGET_SSE2"
6105 {
6106 if (TARGET_SSE4_1)
6107 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6108 else
6109 {
6110 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6111 if (rtx_equal_p (op3, op2))
6112 op3 = gen_reg_rtx (V8HImode);
6113 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6114 emit_insn (gen_addv8hi3 (op0, op3, op2));
6115 DONE;
6116 }
6117 })
6118
6119 (define_expand "smax<mode>3"
6120 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6121 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6122 (match_operand:SSEMODE14 2 "register_operand" "")))]
6123 "TARGET_SSE2"
6124 {
6125 if (TARGET_SSE4_1)
6126 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6127 else
6128 {
6129 rtx xops[6];
6130 bool ok;
6131
6132 xops[0] = operands[0];
6133 xops[1] = operands[1];
6134 xops[2] = operands[2];
6135 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6136 xops[4] = operands[1];
6137 xops[5] = operands[2];
6138 ok = ix86_expand_int_vcond (xops);
6139 gcc_assert (ok);
6140 DONE;
6141 }
6142 })
6143
6144 (define_insn "*sse4_1_<code><mode>3"
6145 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6146 (smaxmin:SSEMODE14
6147 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6148 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6149 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6150 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6151 [(set_attr "type" "sseiadd")
6152 (set_attr "prefix_extra" "1")
6153 (set_attr "mode" "TI")])
6154
6155 (define_expand "smaxv2di3"
6156 [(set (match_operand:V2DI 0 "register_operand" "")
6157 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6158 (match_operand:V2DI 2 "register_operand" "")))]
6159 "TARGET_SSE4_2"
6160 {
6161 rtx xops[6];
6162 bool ok;
6163
6164 xops[0] = operands[0];
6165 xops[1] = operands[1];
6166 xops[2] = operands[2];
6167 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6168 xops[4] = operands[1];
6169 xops[5] = operands[2];
6170 ok = ix86_expand_int_vcond (xops);
6171 gcc_assert (ok);
6172 DONE;
6173 })
6174
6175 (define_expand "umaxv4si3"
6176 [(set (match_operand:V4SI 0 "register_operand" "")
6177 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6178 (match_operand:V4SI 2 "register_operand" "")))]
6179 "TARGET_SSE2"
6180 {
6181 if (TARGET_SSE4_1)
6182 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6183 else
6184 {
6185 rtx xops[6];
6186 bool ok;
6187
6188 xops[0] = operands[0];
6189 xops[1] = operands[1];
6190 xops[2] = operands[2];
6191 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6192 xops[4] = operands[1];
6193 xops[5] = operands[2];
6194 ok = ix86_expand_int_vcond (xops);
6195 gcc_assert (ok);
6196 DONE;
6197 }
6198 })
6199
6200 (define_insn "*sse4_1_<code><mode>3"
6201 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6202 (umaxmin:SSEMODE24
6203 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6204 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6205 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6206 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6207 [(set_attr "type" "sseiadd")
6208 (set_attr "prefix_extra" "1")
6209 (set_attr "mode" "TI")])
6210
6211 (define_expand "umaxv2di3"
6212 [(set (match_operand:V2DI 0 "register_operand" "")
6213 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6214 (match_operand:V2DI 2 "register_operand" "")))]
6215 "TARGET_SSE4_2"
6216 {
6217 rtx xops[6];
6218 bool ok;
6219
6220 xops[0] = operands[0];
6221 xops[1] = operands[1];
6222 xops[2] = operands[2];
6223 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6224 xops[4] = operands[1];
6225 xops[5] = operands[2];
6226 ok = ix86_expand_int_vcond (xops);
6227 gcc_assert (ok);
6228 DONE;
6229 })
6230
6231 (define_expand "smin<mode>3"
6232 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6233 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6234 (match_operand:SSEMODE14 2 "register_operand" "")))]
6235 "TARGET_SSE2"
6236 {
6237 if (TARGET_SSE4_1)
6238 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6239 else
6240 {
6241 rtx xops[6];
6242 bool ok;
6243
6244 xops[0] = operands[0];
6245 xops[1] = operands[2];
6246 xops[2] = operands[1];
6247 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6248 xops[4] = operands[1];
6249 xops[5] = operands[2];
6250 ok = ix86_expand_int_vcond (xops);
6251 gcc_assert (ok);
6252 DONE;
6253 }
6254 })
6255
6256 (define_expand "sminv2di3"
6257 [(set (match_operand:V2DI 0 "register_operand" "")
6258 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6259 (match_operand:V2DI 2 "register_operand" "")))]
6260 "TARGET_SSE4_2"
6261 {
6262 rtx xops[6];
6263 bool ok;
6264
6265 xops[0] = operands[0];
6266 xops[1] = operands[2];
6267 xops[2] = operands[1];
6268 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6269 xops[4] = operands[1];
6270 xops[5] = operands[2];
6271 ok = ix86_expand_int_vcond (xops);
6272 gcc_assert (ok);
6273 DONE;
6274 })
6275
6276 (define_expand "umin<mode>3"
6277 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6278 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6279 (match_operand:SSEMODE24 2 "register_operand" "")))]
6280 "TARGET_SSE2"
6281 {
6282 if (TARGET_SSE4_1)
6283 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6284 else
6285 {
6286 rtx xops[6];
6287 bool ok;
6288
6289 xops[0] = operands[0];
6290 xops[1] = operands[2];
6291 xops[2] = operands[1];
6292 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6293 xops[4] = operands[1];
6294 xops[5] = operands[2];
6295 ok = ix86_expand_int_vcond (xops);
6296 gcc_assert (ok);
6297 DONE;
6298 }
6299 })
6300
6301 (define_expand "uminv2di3"
6302 [(set (match_operand:V2DI 0 "register_operand" "")
6303 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6304 (match_operand:V2DI 2 "register_operand" "")))]
6305 "TARGET_SSE4_2"
6306 {
6307 rtx xops[6];
6308 bool ok;
6309
6310 xops[0] = operands[0];
6311 xops[1] = operands[2];
6312 xops[2] = operands[1];
6313 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6314 xops[4] = operands[1];
6315 xops[5] = operands[2];
6316 ok = ix86_expand_int_vcond (xops);
6317 gcc_assert (ok);
6318 DONE;
6319 })
6320
6321 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6322 ;;
6323 ;; Parallel integral comparisons
6324 ;;
6325 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6326
6327 (define_expand "sse2_eq<mode>3"
6328 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6329 (eq:SSEMODE124
6330 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6331 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6332 "TARGET_SSE2 && !TARGET_XOP "
6333 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6334
6335 (define_insn "*avx_eq<mode>3"
6336 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6337 (eq:SSEMODE1248
6338 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6339 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6340 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6341 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6342 [(set_attr "type" "ssecmp")
6343 (set (attr "prefix_extra")
6344 (if_then_else (match_operand:V2DI 0 "" "")
6345 (const_string "1")
6346 (const_string "*")))
6347 (set_attr "prefix" "vex")
6348 (set_attr "mode" "TI")])
6349
6350 (define_insn "*sse2_eq<mode>3"
6351 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6352 (eq:SSEMODE124
6353 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6354 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6355 "TARGET_SSE2 && !TARGET_XOP
6356 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6357 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6358 [(set_attr "type" "ssecmp")
6359 (set_attr "prefix_data16" "1")
6360 (set_attr "mode" "TI")])
6361
6362 (define_expand "sse4_1_eqv2di3"
6363 [(set (match_operand:V2DI 0 "register_operand" "")
6364 (eq:V2DI
6365 (match_operand:V2DI 1 "nonimmediate_operand" "")
6366 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6367 "TARGET_SSE4_1"
6368 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6369
6370 (define_insn "*sse4_1_eqv2di3"
6371 [(set (match_operand:V2DI 0 "register_operand" "=x")
6372 (eq:V2DI
6373 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6374 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6375 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6376 "pcmpeqq\t{%2, %0|%0, %2}"
6377 [(set_attr "type" "ssecmp")
6378 (set_attr "prefix_extra" "1")
6379 (set_attr "mode" "TI")])
6380
6381 (define_insn "*avx_gt<mode>3"
6382 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6383 (gt:SSEMODE1248
6384 (match_operand:SSEMODE1248 1 "register_operand" "x")
6385 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6386 "TARGET_AVX"
6387 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6388 [(set_attr "type" "ssecmp")
6389 (set (attr "prefix_extra")
6390 (if_then_else (match_operand:V2DI 0 "" "")
6391 (const_string "1")
6392 (const_string "*")))
6393 (set_attr "prefix" "vex")
6394 (set_attr "mode" "TI")])
6395
6396 (define_insn "sse2_gt<mode>3"
6397 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6398 (gt:SSEMODE124
6399 (match_operand:SSEMODE124 1 "register_operand" "0")
6400 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6401 "TARGET_SSE2 && !TARGET_XOP"
6402 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6403 [(set_attr "type" "ssecmp")
6404 (set_attr "prefix_data16" "1")
6405 (set_attr "mode" "TI")])
6406
6407 (define_insn "sse4_2_gtv2di3"
6408 [(set (match_operand:V2DI 0 "register_operand" "=x")
6409 (gt:V2DI
6410 (match_operand:V2DI 1 "register_operand" "0")
6411 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6412 "TARGET_SSE4_2"
6413 "pcmpgtq\t{%2, %0|%0, %2}"
6414 [(set_attr "type" "ssecmp")
6415 (set_attr "prefix_extra" "1")
6416 (set_attr "mode" "TI")])
6417
6418 (define_expand "vcond<mode>"
6419 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6420 (if_then_else:SSEMODE124C8
6421 (match_operator 3 ""
6422 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6423 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6424 (match_operand:SSEMODE124C8 1 "general_operand" "")
6425 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6426 "TARGET_SSE2"
6427 {
6428 bool ok = ix86_expand_int_vcond (operands);
6429 gcc_assert (ok);
6430 DONE;
6431 })
6432
6433 (define_expand "vcondu<mode>"
6434 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6435 (if_then_else:SSEMODE124C8
6436 (match_operator 3 ""
6437 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6438 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6439 (match_operand:SSEMODE124C8 1 "general_operand" "")
6440 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6441 "TARGET_SSE2"
6442 {
6443 bool ok = ix86_expand_int_vcond (operands);
6444 gcc_assert (ok);
6445 DONE;
6446 })
6447
6448 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6449 ;;
6450 ;; Parallel bitwise logical operations
6451 ;;
6452 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6453
6454 (define_expand "one_cmpl<mode>2"
6455 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6456 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6457 (match_dup 2)))]
6458 "TARGET_SSE2"
6459 {
6460 int i, n = GET_MODE_NUNITS (<MODE>mode);
6461 rtvec v = rtvec_alloc (n);
6462
6463 for (i = 0; i < n; ++i)
6464 RTVEC_ELT (v, i) = constm1_rtx;
6465
6466 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6467 })
6468
6469 (define_insn "*avx_andnot<mode>3"
6470 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6471 (and:AVX256MODEI
6472 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6473 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6474 "TARGET_AVX"
6475 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6476 [(set_attr "type" "sselog")
6477 (set_attr "prefix" "vex")
6478 (set_attr "mode" "<avxvecpsmode>")])
6479
6480 (define_insn "*sse_andnot<mode>3"
6481 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6482 (and:SSEMODEI
6483 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6484 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6485 "(TARGET_SSE && !TARGET_SSE2)"
6486 "andnps\t{%2, %0|%0, %2}"
6487 [(set_attr "type" "sselog")
6488 (set_attr "mode" "V4SF")])
6489
6490 (define_insn "*avx_andnot<mode>3"
6491 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6492 (and:SSEMODEI
6493 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6494 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6495 "TARGET_AVX"
6496 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6497 [(set_attr "type" "sselog")
6498 (set_attr "prefix" "vex")
6499 (set_attr "mode" "TI")])
6500
6501 (define_insn "sse2_andnot<mode>3"
6502 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6503 (and:SSEMODEI
6504 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6505 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6506 "TARGET_SSE2"
6507 "pandn\t{%2, %0|%0, %2}"
6508 [(set_attr "type" "sselog")
6509 (set_attr "prefix_data16" "1")
6510 (set_attr "mode" "TI")])
6511
6512 (define_insn "*andnottf3"
6513 [(set (match_operand:TF 0 "register_operand" "=x")
6514 (and:TF
6515 (not:TF (match_operand:TF 1 "register_operand" "0"))
6516 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6517 "TARGET_SSE2"
6518 "pandn\t{%2, %0|%0, %2}"
6519 [(set_attr "type" "sselog")
6520 (set_attr "prefix_data16" "1")
6521 (set_attr "mode" "TI")])
6522
6523 (define_expand "<code><mode>3"
6524 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6525 (any_logic:SSEMODEI
6526 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6527 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6528 "TARGET_SSE"
6529 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6530
6531 (define_insn "*avx_<code><mode>3"
6532 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6533 (any_logic:AVX256MODEI
6534 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6535 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6536 "TARGET_AVX
6537 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6538 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
6539 [(set_attr "type" "sselog")
6540 (set_attr "prefix" "vex")
6541 (set_attr "mode" "<avxvecpsmode>")])
6542
6543 (define_insn "*sse_<code><mode>3"
6544 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6545 (any_logic:SSEMODEI
6546 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6547 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6548 "(TARGET_SSE && !TARGET_SSE2)
6549 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6550 "<logic>ps\t{%2, %0|%0, %2}"
6551 [(set_attr "type" "sselog")
6552 (set_attr "mode" "V4SF")])
6553
6554 (define_insn "*avx_<code><mode>3"
6555 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6556 (any_logic:SSEMODEI
6557 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6558 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6559 "TARGET_AVX
6560 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6561 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6562 [(set_attr "type" "sselog")
6563 (set_attr "prefix" "vex")
6564 (set_attr "mode" "TI")])
6565
6566 (define_insn "*sse2_<code><mode>3"
6567 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6568 (any_logic:SSEMODEI
6569 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6570 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6571 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6572 "p<logic>\t{%2, %0|%0, %2}"
6573 [(set_attr "type" "sselog")
6574 (set_attr "prefix_data16" "1")
6575 (set_attr "mode" "TI")])
6576
6577 (define_expand "<code>tf3"
6578 [(set (match_operand:TF 0 "register_operand" "")
6579 (any_logic:TF
6580 (match_operand:TF 1 "nonimmediate_operand" "")
6581 (match_operand:TF 2 "nonimmediate_operand" "")))]
6582 "TARGET_SSE2"
6583 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6584
6585 (define_insn "*<code>tf3"
6586 [(set (match_operand:TF 0 "register_operand" "=x")
6587 (any_logic:TF
6588 (match_operand:TF 1 "nonimmediate_operand" "%0")
6589 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6590 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6591 "p<logic>\t{%2, %0|%0, %2}"
6592 [(set_attr "type" "sselog")
6593 (set_attr "prefix_data16" "1")
6594 (set_attr "mode" "TI")])
6595
6596 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6597 ;;
6598 ;; Parallel integral element swizzling
6599 ;;
6600 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6601
6602 (define_expand "vec_pack_trunc_v8hi"
6603 [(match_operand:V16QI 0 "register_operand" "")
6604 (match_operand:V8HI 1 "register_operand" "")
6605 (match_operand:V8HI 2 "register_operand" "")]
6606 "TARGET_SSE2"
6607 {
6608 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6609 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6610 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6611 DONE;
6612 })
6613
6614 (define_expand "vec_pack_trunc_v4si"
6615 [(match_operand:V8HI 0 "register_operand" "")
6616 (match_operand:V4SI 1 "register_operand" "")
6617 (match_operand:V4SI 2 "register_operand" "")]
6618 "TARGET_SSE2"
6619 {
6620 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6621 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6622 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6623 DONE;
6624 })
6625
6626 (define_expand "vec_pack_trunc_v2di"
6627 [(match_operand:V4SI 0 "register_operand" "")
6628 (match_operand:V2DI 1 "register_operand" "")
6629 (match_operand:V2DI 2 "register_operand" "")]
6630 "TARGET_SSE2"
6631 {
6632 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6633 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6634 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6635 DONE;
6636 })
6637
6638 (define_insn "*avx_packsswb"
6639 [(set (match_operand:V16QI 0 "register_operand" "=x")
6640 (vec_concat:V16QI
6641 (ss_truncate:V8QI
6642 (match_operand:V8HI 1 "register_operand" "x"))
6643 (ss_truncate:V8QI
6644 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6645 "TARGET_AVX"
6646 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6647 [(set_attr "type" "sselog")
6648 (set_attr "prefix" "vex")
6649 (set_attr "mode" "TI")])
6650
6651 (define_insn "sse2_packsswb"
6652 [(set (match_operand:V16QI 0 "register_operand" "=x")
6653 (vec_concat:V16QI
6654 (ss_truncate:V8QI
6655 (match_operand:V8HI 1 "register_operand" "0"))
6656 (ss_truncate:V8QI
6657 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6658 "TARGET_SSE2"
6659 "packsswb\t{%2, %0|%0, %2}"
6660 [(set_attr "type" "sselog")
6661 (set_attr "prefix_data16" "1")
6662 (set_attr "mode" "TI")])
6663
6664 (define_insn "*avx_packssdw"
6665 [(set (match_operand:V8HI 0 "register_operand" "=x")
6666 (vec_concat:V8HI
6667 (ss_truncate:V4HI
6668 (match_operand:V4SI 1 "register_operand" "x"))
6669 (ss_truncate:V4HI
6670 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6671 "TARGET_AVX"
6672 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6673 [(set_attr "type" "sselog")
6674 (set_attr "prefix" "vex")
6675 (set_attr "mode" "TI")])
6676
6677 (define_insn "sse2_packssdw"
6678 [(set (match_operand:V8HI 0 "register_operand" "=x")
6679 (vec_concat:V8HI
6680 (ss_truncate:V4HI
6681 (match_operand:V4SI 1 "register_operand" "0"))
6682 (ss_truncate:V4HI
6683 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6684 "TARGET_SSE2"
6685 "packssdw\t{%2, %0|%0, %2}"
6686 [(set_attr "type" "sselog")
6687 (set_attr "prefix_data16" "1")
6688 (set_attr "mode" "TI")])
6689
6690 (define_insn "*avx_packuswb"
6691 [(set (match_operand:V16QI 0 "register_operand" "=x")
6692 (vec_concat:V16QI
6693 (us_truncate:V8QI
6694 (match_operand:V8HI 1 "register_operand" "x"))
6695 (us_truncate:V8QI
6696 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6697 "TARGET_AVX"
6698 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6699 [(set_attr "type" "sselog")
6700 (set_attr "prefix" "vex")
6701 (set_attr "mode" "TI")])
6702
6703 (define_insn "sse2_packuswb"
6704 [(set (match_operand:V16QI 0 "register_operand" "=x")
6705 (vec_concat:V16QI
6706 (us_truncate:V8QI
6707 (match_operand:V8HI 1 "register_operand" "0"))
6708 (us_truncate:V8QI
6709 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6710 "TARGET_SSE2"
6711 "packuswb\t{%2, %0|%0, %2}"
6712 [(set_attr "type" "sselog")
6713 (set_attr "prefix_data16" "1")
6714 (set_attr "mode" "TI")])
6715
6716 (define_insn "*avx_interleave_highv16qi"
6717 [(set (match_operand:V16QI 0 "register_operand" "=x")
6718 (vec_select:V16QI
6719 (vec_concat:V32QI
6720 (match_operand:V16QI 1 "register_operand" "x")
6721 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6722 (parallel [(const_int 8) (const_int 24)
6723 (const_int 9) (const_int 25)
6724 (const_int 10) (const_int 26)
6725 (const_int 11) (const_int 27)
6726 (const_int 12) (const_int 28)
6727 (const_int 13) (const_int 29)
6728 (const_int 14) (const_int 30)
6729 (const_int 15) (const_int 31)])))]
6730 "TARGET_AVX"
6731 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6732 [(set_attr "type" "sselog")
6733 (set_attr "prefix" "vex")
6734 (set_attr "mode" "TI")])
6735
6736 (define_insn "vec_interleave_highv16qi"
6737 [(set (match_operand:V16QI 0 "register_operand" "=x")
6738 (vec_select:V16QI
6739 (vec_concat:V32QI
6740 (match_operand:V16QI 1 "register_operand" "0")
6741 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6742 (parallel [(const_int 8) (const_int 24)
6743 (const_int 9) (const_int 25)
6744 (const_int 10) (const_int 26)
6745 (const_int 11) (const_int 27)
6746 (const_int 12) (const_int 28)
6747 (const_int 13) (const_int 29)
6748 (const_int 14) (const_int 30)
6749 (const_int 15) (const_int 31)])))]
6750 "TARGET_SSE2"
6751 "punpckhbw\t{%2, %0|%0, %2}"
6752 [(set_attr "type" "sselog")
6753 (set_attr "prefix_data16" "1")
6754 (set_attr "mode" "TI")])
6755
6756 (define_insn "*avx_interleave_lowv16qi"
6757 [(set (match_operand:V16QI 0 "register_operand" "=x")
6758 (vec_select:V16QI
6759 (vec_concat:V32QI
6760 (match_operand:V16QI 1 "register_operand" "x")
6761 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6762 (parallel [(const_int 0) (const_int 16)
6763 (const_int 1) (const_int 17)
6764 (const_int 2) (const_int 18)
6765 (const_int 3) (const_int 19)
6766 (const_int 4) (const_int 20)
6767 (const_int 5) (const_int 21)
6768 (const_int 6) (const_int 22)
6769 (const_int 7) (const_int 23)])))]
6770 "TARGET_AVX"
6771 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6772 [(set_attr "type" "sselog")
6773 (set_attr "prefix" "vex")
6774 (set_attr "mode" "TI")])
6775
6776 (define_insn "vec_interleave_lowv16qi"
6777 [(set (match_operand:V16QI 0 "register_operand" "=x")
6778 (vec_select:V16QI
6779 (vec_concat:V32QI
6780 (match_operand:V16QI 1 "register_operand" "0")
6781 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6782 (parallel [(const_int 0) (const_int 16)
6783 (const_int 1) (const_int 17)
6784 (const_int 2) (const_int 18)
6785 (const_int 3) (const_int 19)
6786 (const_int 4) (const_int 20)
6787 (const_int 5) (const_int 21)
6788 (const_int 6) (const_int 22)
6789 (const_int 7) (const_int 23)])))]
6790 "TARGET_SSE2"
6791 "punpcklbw\t{%2, %0|%0, %2}"
6792 [(set_attr "type" "sselog")
6793 (set_attr "prefix_data16" "1")
6794 (set_attr "mode" "TI")])
6795
6796 (define_insn "*avx_interleave_highv8hi"
6797 [(set (match_operand:V8HI 0 "register_operand" "=x")
6798 (vec_select:V8HI
6799 (vec_concat:V16HI
6800 (match_operand:V8HI 1 "register_operand" "x")
6801 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6802 (parallel [(const_int 4) (const_int 12)
6803 (const_int 5) (const_int 13)
6804 (const_int 6) (const_int 14)
6805 (const_int 7) (const_int 15)])))]
6806 "TARGET_AVX"
6807 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6808 [(set_attr "type" "sselog")
6809 (set_attr "prefix" "vex")
6810 (set_attr "mode" "TI")])
6811
6812 (define_insn "vec_interleave_highv8hi"
6813 [(set (match_operand:V8HI 0 "register_operand" "=x")
6814 (vec_select:V8HI
6815 (vec_concat:V16HI
6816 (match_operand:V8HI 1 "register_operand" "0")
6817 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6818 (parallel [(const_int 4) (const_int 12)
6819 (const_int 5) (const_int 13)
6820 (const_int 6) (const_int 14)
6821 (const_int 7) (const_int 15)])))]
6822 "TARGET_SSE2"
6823 "punpckhwd\t{%2, %0|%0, %2}"
6824 [(set_attr "type" "sselog")
6825 (set_attr "prefix_data16" "1")
6826 (set_attr "mode" "TI")])
6827
6828 (define_insn "*avx_interleave_lowv8hi"
6829 [(set (match_operand:V8HI 0 "register_operand" "=x")
6830 (vec_select:V8HI
6831 (vec_concat:V16HI
6832 (match_operand:V8HI 1 "register_operand" "x")
6833 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6834 (parallel [(const_int 0) (const_int 8)
6835 (const_int 1) (const_int 9)
6836 (const_int 2) (const_int 10)
6837 (const_int 3) (const_int 11)])))]
6838 "TARGET_AVX"
6839 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6840 [(set_attr "type" "sselog")
6841 (set_attr "prefix" "vex")
6842 (set_attr "mode" "TI")])
6843
6844 (define_insn "vec_interleave_lowv8hi"
6845 [(set (match_operand:V8HI 0 "register_operand" "=x")
6846 (vec_select:V8HI
6847 (vec_concat:V16HI
6848 (match_operand:V8HI 1 "register_operand" "0")
6849 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6850 (parallel [(const_int 0) (const_int 8)
6851 (const_int 1) (const_int 9)
6852 (const_int 2) (const_int 10)
6853 (const_int 3) (const_int 11)])))]
6854 "TARGET_SSE2"
6855 "punpcklwd\t{%2, %0|%0, %2}"
6856 [(set_attr "type" "sselog")
6857 (set_attr "prefix_data16" "1")
6858 (set_attr "mode" "TI")])
6859
6860 (define_insn "*avx_interleave_highv4si"
6861 [(set (match_operand:V4SI 0 "register_operand" "=x")
6862 (vec_select:V4SI
6863 (vec_concat:V8SI
6864 (match_operand:V4SI 1 "register_operand" "x")
6865 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6866 (parallel [(const_int 2) (const_int 6)
6867 (const_int 3) (const_int 7)])))]
6868 "TARGET_AVX"
6869 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6870 [(set_attr "type" "sselog")
6871 (set_attr "prefix" "vex")
6872 (set_attr "mode" "TI")])
6873
6874 (define_insn "vec_interleave_highv4si"
6875 [(set (match_operand:V4SI 0 "register_operand" "=x")
6876 (vec_select:V4SI
6877 (vec_concat:V8SI
6878 (match_operand:V4SI 1 "register_operand" "0")
6879 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6880 (parallel [(const_int 2) (const_int 6)
6881 (const_int 3) (const_int 7)])))]
6882 "TARGET_SSE2"
6883 "punpckhdq\t{%2, %0|%0, %2}"
6884 [(set_attr "type" "sselog")
6885 (set_attr "prefix_data16" "1")
6886 (set_attr "mode" "TI")])
6887
6888 (define_insn "*avx_interleave_lowv4si"
6889 [(set (match_operand:V4SI 0 "register_operand" "=x")
6890 (vec_select:V4SI
6891 (vec_concat:V8SI
6892 (match_operand:V4SI 1 "register_operand" "x")
6893 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6894 (parallel [(const_int 0) (const_int 4)
6895 (const_int 1) (const_int 5)])))]
6896 "TARGET_AVX"
6897 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6898 [(set_attr "type" "sselog")
6899 (set_attr "prefix" "vex")
6900 (set_attr "mode" "TI")])
6901
6902 (define_insn "vec_interleave_lowv4si"
6903 [(set (match_operand:V4SI 0 "register_operand" "=x")
6904 (vec_select:V4SI
6905 (vec_concat:V8SI
6906 (match_operand:V4SI 1 "register_operand" "0")
6907 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6908 (parallel [(const_int 0) (const_int 4)
6909 (const_int 1) (const_int 5)])))]
6910 "TARGET_SSE2"
6911 "punpckldq\t{%2, %0|%0, %2}"
6912 [(set_attr "type" "sselog")
6913 (set_attr "prefix_data16" "1")
6914 (set_attr "mode" "TI")])
6915
6916 (define_insn "*avx_pinsr<ssevecsize>"
6917 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6918 (vec_merge:SSEMODE124
6919 (vec_duplicate:SSEMODE124
6920 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6921 (match_operand:SSEMODE124 1 "register_operand" "x")
6922 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6923 "TARGET_AVX"
6924 {
6925 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6926 if (MEM_P (operands[2]))
6927 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6928 else
6929 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6930 }
6931 [(set_attr "type" "sselog")
6932 (set (attr "prefix_extra")
6933 (if_then_else (match_operand:V8HI 0 "" "")
6934 (const_string "0")
6935 (const_string "1")))
6936 (set_attr "length_immediate" "1")
6937 (set_attr "prefix" "vex")
6938 (set_attr "mode" "TI")])
6939
6940 (define_insn "*sse4_1_pinsrb"
6941 [(set (match_operand:V16QI 0 "register_operand" "=x")
6942 (vec_merge:V16QI
6943 (vec_duplicate:V16QI
6944 (match_operand:QI 2 "nonimmediate_operand" "rm"))
6945 (match_operand:V16QI 1 "register_operand" "0")
6946 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6947 "TARGET_SSE4_1"
6948 {
6949 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6950 if (MEM_P (operands[2]))
6951 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6952 else
6953 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6954 }
6955 [(set_attr "type" "sselog")
6956 (set_attr "prefix_extra" "1")
6957 (set_attr "length_immediate" "1")
6958 (set_attr "mode" "TI")])
6959
6960 (define_insn "*sse2_pinsrw"
6961 [(set (match_operand:V8HI 0 "register_operand" "=x")
6962 (vec_merge:V8HI
6963 (vec_duplicate:V8HI
6964 (match_operand:HI 2 "nonimmediate_operand" "rm"))
6965 (match_operand:V8HI 1 "register_operand" "0")
6966 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6967 "TARGET_SSE2"
6968 {
6969 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6970 if (MEM_P (operands[2]))
6971 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6972 else
6973 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6974 }
6975 [(set_attr "type" "sselog")
6976 (set_attr "prefix_data16" "1")
6977 (set_attr "length_immediate" "1")
6978 (set_attr "mode" "TI")])
6979
6980 ;; It must come before sse2_loadld since it is preferred.
6981 (define_insn "*sse4_1_pinsrd"
6982 [(set (match_operand:V4SI 0 "register_operand" "=x")
6983 (vec_merge:V4SI
6984 (vec_duplicate:V4SI
6985 (match_operand:SI 2 "nonimmediate_operand" "rm"))
6986 (match_operand:V4SI 1 "register_operand" "0")
6987 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6988 "TARGET_SSE4_1"
6989 {
6990 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6991 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6992 }
6993 [(set_attr "type" "sselog")
6994 (set_attr "prefix_extra" "1")
6995 (set_attr "length_immediate" "1")
6996 (set_attr "mode" "TI")])
6997
6998 (define_insn "*avx_pinsrq"
6999 [(set (match_operand:V2DI 0 "register_operand" "=x")
7000 (vec_merge:V2DI
7001 (vec_duplicate:V2DI
7002 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7003 (match_operand:V2DI 1 "register_operand" "x")
7004 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7005 "TARGET_AVX && TARGET_64BIT"
7006 {
7007 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7008 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7009 }
7010 [(set_attr "type" "sselog")
7011 (set_attr "prefix_extra" "1")
7012 (set_attr "length_immediate" "1")
7013 (set_attr "prefix" "vex")
7014 (set_attr "mode" "TI")])
7015
7016 (define_insn "*sse4_1_pinsrq"
7017 [(set (match_operand:V2DI 0 "register_operand" "=x")
7018 (vec_merge:V2DI
7019 (vec_duplicate:V2DI
7020 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7021 (match_operand:V2DI 1 "register_operand" "0")
7022 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7023 "TARGET_SSE4_1 && TARGET_64BIT"
7024 {
7025 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7026 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7027 }
7028 [(set_attr "type" "sselog")
7029 (set_attr "prefix_rex" "1")
7030 (set_attr "prefix_extra" "1")
7031 (set_attr "length_immediate" "1")
7032 (set_attr "mode" "TI")])
7033
7034 (define_insn "*sse4_1_pextrb_<mode>"
7035 [(set (match_operand:SWI48 0 "register_operand" "=r")
7036 (zero_extend:SWI48
7037 (vec_select:QI
7038 (match_operand:V16QI 1 "register_operand" "x")
7039 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7040 "TARGET_SSE4_1"
7041 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7042 [(set_attr "type" "sselog")
7043 (set_attr "prefix_extra" "1")
7044 (set_attr "length_immediate" "1")
7045 (set_attr "prefix" "maybe_vex")
7046 (set_attr "mode" "TI")])
7047
7048 (define_insn "*sse4_1_pextrb_memory"
7049 [(set (match_operand:QI 0 "memory_operand" "=m")
7050 (vec_select:QI
7051 (match_operand:V16QI 1 "register_operand" "x")
7052 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7053 "TARGET_SSE4_1"
7054 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7055 [(set_attr "type" "sselog")
7056 (set_attr "prefix_extra" "1")
7057 (set_attr "length_immediate" "1")
7058 (set_attr "prefix" "maybe_vex")
7059 (set_attr "mode" "TI")])
7060
7061 (define_insn "*sse2_pextrw_<mode>"
7062 [(set (match_operand:SWI48 0 "register_operand" "=r")
7063 (zero_extend:SWI48
7064 (vec_select:HI
7065 (match_operand:V8HI 1 "register_operand" "x")
7066 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7067 "TARGET_SSE2"
7068 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7069 [(set_attr "type" "sselog")
7070 (set_attr "prefix_data16" "1")
7071 (set_attr "length_immediate" "1")
7072 (set_attr "prefix" "maybe_vex")
7073 (set_attr "mode" "TI")])
7074
7075 (define_insn "*sse4_1_pextrw_memory"
7076 [(set (match_operand:HI 0 "memory_operand" "=m")
7077 (vec_select:HI
7078 (match_operand:V8HI 1 "register_operand" "x")
7079 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7080 "TARGET_SSE4_1"
7081 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7082 [(set_attr "type" "sselog")
7083 (set_attr "prefix_extra" "1")
7084 (set_attr "length_immediate" "1")
7085 (set_attr "prefix" "maybe_vex")
7086 (set_attr "mode" "TI")])
7087
7088 (define_insn "*sse4_1_pextrd"
7089 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7090 (vec_select:SI
7091 (match_operand:V4SI 1 "register_operand" "x")
7092 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7093 "TARGET_SSE4_1"
7094 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7095 [(set_attr "type" "sselog")
7096 (set_attr "prefix_extra" "1")
7097 (set_attr "length_immediate" "1")
7098 (set_attr "prefix" "maybe_vex")
7099 (set_attr "mode" "TI")])
7100
7101 (define_insn "*sse4_1_pextrd_zext"
7102 [(set (match_operand:DI 0 "register_operand" "=r")
7103 (zero_extend:DI
7104 (vec_select:SI
7105 (match_operand:V4SI 1 "register_operand" "x")
7106 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7107 "TARGET_64BIT && TARGET_SSE4_1"
7108 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7109 [(set_attr "type" "sselog")
7110 (set_attr "prefix_extra" "1")
7111 (set_attr "length_immediate" "1")
7112 (set_attr "prefix" "maybe_vex")
7113 (set_attr "mode" "TI")])
7114
7115 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7116 (define_insn "*sse4_1_pextrq"
7117 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7118 (vec_select:DI
7119 (match_operand:V2DI 1 "register_operand" "x")
7120 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7121 "TARGET_SSE4_1 && TARGET_64BIT"
7122 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7123 [(set_attr "type" "sselog")
7124 (set_attr "prefix_rex" "1")
7125 (set_attr "prefix_extra" "1")
7126 (set_attr "length_immediate" "1")
7127 (set_attr "prefix" "maybe_vex")
7128 (set_attr "mode" "TI")])
7129
7130 (define_expand "sse2_pshufd"
7131 [(match_operand:V4SI 0 "register_operand" "")
7132 (match_operand:V4SI 1 "nonimmediate_operand" "")
7133 (match_operand:SI 2 "const_int_operand" "")]
7134 "TARGET_SSE2"
7135 {
7136 int mask = INTVAL (operands[2]);
7137 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7138 GEN_INT ((mask >> 0) & 3),
7139 GEN_INT ((mask >> 2) & 3),
7140 GEN_INT ((mask >> 4) & 3),
7141 GEN_INT ((mask >> 6) & 3)));
7142 DONE;
7143 })
7144
7145 (define_insn "sse2_pshufd_1"
7146 [(set (match_operand:V4SI 0 "register_operand" "=x")
7147 (vec_select:V4SI
7148 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7149 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7150 (match_operand 3 "const_0_to_3_operand" "")
7151 (match_operand 4 "const_0_to_3_operand" "")
7152 (match_operand 5 "const_0_to_3_operand" "")])))]
7153 "TARGET_SSE2"
7154 {
7155 int mask = 0;
7156 mask |= INTVAL (operands[2]) << 0;
7157 mask |= INTVAL (operands[3]) << 2;
7158 mask |= INTVAL (operands[4]) << 4;
7159 mask |= INTVAL (operands[5]) << 6;
7160 operands[2] = GEN_INT (mask);
7161
7162 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7163 }
7164 [(set_attr "type" "sselog1")
7165 (set_attr "prefix_data16" "1")
7166 (set_attr "prefix" "maybe_vex")
7167 (set_attr "length_immediate" "1")
7168 (set_attr "mode" "TI")])
7169
7170 (define_expand "sse2_pshuflw"
7171 [(match_operand:V8HI 0 "register_operand" "")
7172 (match_operand:V8HI 1 "nonimmediate_operand" "")
7173 (match_operand:SI 2 "const_int_operand" "")]
7174 "TARGET_SSE2"
7175 {
7176 int mask = INTVAL (operands[2]);
7177 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7178 GEN_INT ((mask >> 0) & 3),
7179 GEN_INT ((mask >> 2) & 3),
7180 GEN_INT ((mask >> 4) & 3),
7181 GEN_INT ((mask >> 6) & 3)));
7182 DONE;
7183 })
7184
7185 (define_insn "sse2_pshuflw_1"
7186 [(set (match_operand:V8HI 0 "register_operand" "=x")
7187 (vec_select:V8HI
7188 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7189 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7190 (match_operand 3 "const_0_to_3_operand" "")
7191 (match_operand 4 "const_0_to_3_operand" "")
7192 (match_operand 5 "const_0_to_3_operand" "")
7193 (const_int 4)
7194 (const_int 5)
7195 (const_int 6)
7196 (const_int 7)])))]
7197 "TARGET_SSE2"
7198 {
7199 int mask = 0;
7200 mask |= INTVAL (operands[2]) << 0;
7201 mask |= INTVAL (operands[3]) << 2;
7202 mask |= INTVAL (operands[4]) << 4;
7203 mask |= INTVAL (operands[5]) << 6;
7204 operands[2] = GEN_INT (mask);
7205
7206 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7207 }
7208 [(set_attr "type" "sselog")
7209 (set_attr "prefix_data16" "0")
7210 (set_attr "prefix_rep" "1")
7211 (set_attr "prefix" "maybe_vex")
7212 (set_attr "length_immediate" "1")
7213 (set_attr "mode" "TI")])
7214
7215 (define_expand "sse2_pshufhw"
7216 [(match_operand:V8HI 0 "register_operand" "")
7217 (match_operand:V8HI 1 "nonimmediate_operand" "")
7218 (match_operand:SI 2 "const_int_operand" "")]
7219 "TARGET_SSE2"
7220 {
7221 int mask = INTVAL (operands[2]);
7222 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7223 GEN_INT (((mask >> 0) & 3) + 4),
7224 GEN_INT (((mask >> 2) & 3) + 4),
7225 GEN_INT (((mask >> 4) & 3) + 4),
7226 GEN_INT (((mask >> 6) & 3) + 4)));
7227 DONE;
7228 })
7229
7230 (define_insn "sse2_pshufhw_1"
7231 [(set (match_operand:V8HI 0 "register_operand" "=x")
7232 (vec_select:V8HI
7233 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7234 (parallel [(const_int 0)
7235 (const_int 1)
7236 (const_int 2)
7237 (const_int 3)
7238 (match_operand 2 "const_4_to_7_operand" "")
7239 (match_operand 3 "const_4_to_7_operand" "")
7240 (match_operand 4 "const_4_to_7_operand" "")
7241 (match_operand 5 "const_4_to_7_operand" "")])))]
7242 "TARGET_SSE2"
7243 {
7244 int mask = 0;
7245 mask |= (INTVAL (operands[2]) - 4) << 0;
7246 mask |= (INTVAL (operands[3]) - 4) << 2;
7247 mask |= (INTVAL (operands[4]) - 4) << 4;
7248 mask |= (INTVAL (operands[5]) - 4) << 6;
7249 operands[2] = GEN_INT (mask);
7250
7251 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7252 }
7253 [(set_attr "type" "sselog")
7254 (set_attr "prefix_rep" "1")
7255 (set_attr "prefix_data16" "0")
7256 (set_attr "prefix" "maybe_vex")
7257 (set_attr "length_immediate" "1")
7258 (set_attr "mode" "TI")])
7259
7260 (define_expand "sse2_loadd"
7261 [(set (match_operand:V4SI 0 "register_operand" "")
7262 (vec_merge:V4SI
7263 (vec_duplicate:V4SI
7264 (match_operand:SI 1 "nonimmediate_operand" ""))
7265 (match_dup 2)
7266 (const_int 1)))]
7267 "TARGET_SSE"
7268 "operands[2] = CONST0_RTX (V4SImode);")
7269
7270 (define_insn "*avx_loadld"
7271 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7272 (vec_merge:V4SI
7273 (vec_duplicate:V4SI
7274 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7275 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7276 (const_int 1)))]
7277 "TARGET_AVX"
7278 "@
7279 vmovd\t{%2, %0|%0, %2}
7280 vmovd\t{%2, %0|%0, %2}
7281 vmovss\t{%2, %1, %0|%0, %1, %2}"
7282 [(set_attr "type" "ssemov")
7283 (set_attr "prefix" "vex")
7284 (set_attr "mode" "TI,TI,V4SF")])
7285
7286 (define_insn "sse2_loadld"
7287 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7288 (vec_merge:V4SI
7289 (vec_duplicate:V4SI
7290 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7291 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7292 (const_int 1)))]
7293 "TARGET_SSE"
7294 "@
7295 movd\t{%2, %0|%0, %2}
7296 movd\t{%2, %0|%0, %2}
7297 movss\t{%2, %0|%0, %2}
7298 movss\t{%2, %0|%0, %2}"
7299 [(set_attr "type" "ssemov")
7300 (set_attr "mode" "TI,TI,V4SF,SF")])
7301
7302 (define_insn_and_split "sse2_stored"
7303 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7304 (vec_select:SI
7305 (match_operand:V4SI 1 "register_operand" "x,Yi")
7306 (parallel [(const_int 0)])))]
7307 "TARGET_SSE"
7308 "#"
7309 "&& reload_completed
7310 && (TARGET_INTER_UNIT_MOVES
7311 || MEM_P (operands [0])
7312 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7313 [(set (match_dup 0) (match_dup 1))]
7314 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7315
7316 (define_insn_and_split "*vec_ext_v4si_mem"
7317 [(set (match_operand:SI 0 "register_operand" "=r")
7318 (vec_select:SI
7319 (match_operand:V4SI 1 "memory_operand" "o")
7320 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7321 ""
7322 "#"
7323 "reload_completed"
7324 [(const_int 0)]
7325 {
7326 int i = INTVAL (operands[2]);
7327
7328 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7329 DONE;
7330 })
7331
7332 (define_expand "sse_storeq"
7333 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7334 (vec_select:DI
7335 (match_operand:V2DI 1 "register_operand" "")
7336 (parallel [(const_int 0)])))]
7337 "TARGET_SSE")
7338
7339 (define_insn "*sse2_storeq_rex64"
7340 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7341 (vec_select:DI
7342 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7343 (parallel [(const_int 0)])))]
7344 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7345 "@
7346 #
7347 #
7348 %vmov{q}\t{%1, %0|%0, %1}"
7349 [(set_attr "type" "*,*,imov")
7350 (set_attr "prefix" "*,*,maybe_vex")
7351 (set_attr "mode" "*,*,DI")])
7352
7353 (define_insn "*sse2_storeq"
7354 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7355 (vec_select:DI
7356 (match_operand:V2DI 1 "register_operand" "x")
7357 (parallel [(const_int 0)])))]
7358 "TARGET_SSE"
7359 "#")
7360
7361 (define_split
7362 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7363 (vec_select:DI
7364 (match_operand:V2DI 1 "register_operand" "")
7365 (parallel [(const_int 0)])))]
7366 "TARGET_SSE
7367 && reload_completed
7368 && (TARGET_INTER_UNIT_MOVES
7369 || MEM_P (operands [0])
7370 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7371 [(set (match_dup 0) (match_dup 1))]
7372 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7373
7374 (define_insn "*vec_extractv2di_1_rex64_avx"
7375 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7376 (vec_select:DI
7377 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7378 (parallel [(const_int 1)])))]
7379 "TARGET_64BIT
7380 && TARGET_AVX
7381 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7382 "@
7383 vmovhps\t{%1, %0|%0, %1}
7384 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7385 vmovq\t{%H1, %0|%0, %H1}
7386 vmov{q}\t{%H1, %0|%0, %H1}"
7387 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7388 (set_attr "length_immediate" "*,1,*,*")
7389 (set_attr "memory" "*,none,*,*")
7390 (set_attr "prefix" "vex")
7391 (set_attr "mode" "V2SF,TI,TI,DI")])
7392
7393 (define_insn "*vec_extractv2di_1_rex64"
7394 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7395 (vec_select:DI
7396 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7397 (parallel [(const_int 1)])))]
7398 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7399 "@
7400 movhps\t{%1, %0|%0, %1}
7401 psrldq\t{$8, %0|%0, 8}
7402 movq\t{%H1, %0|%0, %H1}
7403 mov{q}\t{%H1, %0|%0, %H1}"
7404 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7405 (set_attr "length_immediate" "*,1,*,*")
7406 (set_attr "memory" "*,none,*,*")
7407 (set_attr "mode" "V2SF,TI,TI,DI")])
7408
7409 (define_insn "*vec_extractv2di_1_avx"
7410 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7411 (vec_select:DI
7412 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7413 (parallel [(const_int 1)])))]
7414 "!TARGET_64BIT
7415 && TARGET_AVX
7416 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7417 "@
7418 vmovhps\t{%1, %0|%0, %1}
7419 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7420 vmovq\t{%H1, %0|%0, %H1}"
7421 [(set_attr "type" "ssemov,sseishft1,ssemov")
7422 (set_attr "length_immediate" "*,1,*")
7423 (set_attr "memory" "*,none,*")
7424 (set_attr "prefix" "vex")
7425 (set_attr "mode" "V2SF,TI,TI")])
7426
7427 (define_insn "*vec_extractv2di_1_sse2"
7428 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7429 (vec_select:DI
7430 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7431 (parallel [(const_int 1)])))]
7432 "!TARGET_64BIT
7433 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7434 "@
7435 movhps\t{%1, %0|%0, %1}
7436 psrldq\t{$8, %0|%0, 8}
7437 movq\t{%H1, %0|%0, %H1}"
7438 [(set_attr "type" "ssemov,sseishft1,ssemov")
7439 (set_attr "length_immediate" "*,1,*")
7440 (set_attr "memory" "*,none,*")
7441 (set_attr "mode" "V2SF,TI,TI")])
7442
7443 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7444 (define_insn "*vec_extractv2di_1_sse"
7445 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7446 (vec_select:DI
7447 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7448 (parallel [(const_int 1)])))]
7449 "!TARGET_SSE2 && TARGET_SSE
7450 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7451 "@
7452 movhps\t{%1, %0|%0, %1}
7453 movhlps\t{%1, %0|%0, %1}
7454 movlps\t{%H1, %0|%0, %H1}"
7455 [(set_attr "type" "ssemov")
7456 (set_attr "mode" "V2SF,V4SF,V2SF")])
7457
7458 (define_insn "*vec_dupv4si_avx"
7459 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7460 (vec_duplicate:V4SI
7461 (match_operand:SI 1 "register_operand" "x,m")))]
7462 "TARGET_AVX"
7463 "@
7464 vpshufd\t{$0, %1, %0|%0, %1, 0}
7465 vbroadcastss\t{%1, %0|%0, %1}"
7466 [(set_attr "type" "sselog1,ssemov")
7467 (set_attr "length_immediate" "1,0")
7468 (set_attr "prefix_extra" "0,1")
7469 (set_attr "prefix" "vex")
7470 (set_attr "mode" "TI,V4SF")])
7471
7472 (define_insn "*vec_dupv4si"
7473 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7474 (vec_duplicate:V4SI
7475 (match_operand:SI 1 "register_operand" " Y2,0")))]
7476 "TARGET_SSE"
7477 "@
7478 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7479 shufps\t{$0, %0, %0|%0, %0, 0}"
7480 [(set_attr "type" "sselog1")
7481 (set_attr "length_immediate" "1")
7482 (set_attr "mode" "TI,V4SF")])
7483
7484 (define_insn "*vec_dupv2di_avx"
7485 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7486 (vec_duplicate:V2DI
7487 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7488 "TARGET_AVX"
7489 "@
7490 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7491 vmovddup\t{%1, %0|%0, %1}"
7492 [(set_attr "type" "sselog1")
7493 (set_attr "prefix" "vex")
7494 (set_attr "mode" "TI,DF")])
7495
7496 (define_insn "*vec_dupv2di_sse3"
7497 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7498 (vec_duplicate:V2DI
7499 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7500 "TARGET_SSE3"
7501 "@
7502 punpcklqdq\t%0, %0
7503 movddup\t{%1, %0|%0, %1}"
7504 [(set_attr "type" "sselog1")
7505 (set_attr "mode" "TI,DF")])
7506
7507 (define_insn "*vec_dupv2di"
7508 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7509 (vec_duplicate:V2DI
7510 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7511 "TARGET_SSE"
7512 "@
7513 punpcklqdq\t%0, %0
7514 movlhps\t%0, %0"
7515 [(set_attr "type" "sselog1,ssemov")
7516 (set_attr "mode" "TI,V4SF")])
7517
7518 (define_insn "*vec_concatv2si_avx"
7519 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7520 (vec_concat:V2SI
7521 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7522 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7523 "TARGET_AVX"
7524 "@
7525 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7526 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7527 vmovd\t{%1, %0|%0, %1}
7528 punpckldq\t{%2, %0|%0, %2}
7529 movd\t{%1, %0|%0, %1}"
7530 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7531 (set_attr "prefix_extra" "1,*,*,*,*")
7532 (set_attr "length_immediate" "1,*,*,*,*")
7533 (set (attr "prefix")
7534 (if_then_else (eq_attr "alternative" "3,4")
7535 (const_string "orig")
7536 (const_string "vex")))
7537 (set_attr "mode" "TI,TI,TI,DI,DI")])
7538
7539 (define_insn "*vec_concatv2si_sse4_1"
7540 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7541 (vec_concat:V2SI
7542 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7543 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7544 "TARGET_SSE4_1"
7545 "@
7546 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7547 punpckldq\t{%2, %0|%0, %2}
7548 movd\t{%1, %0|%0, %1}
7549 punpckldq\t{%2, %0|%0, %2}
7550 movd\t{%1, %0|%0, %1}"
7551 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7552 (set_attr "prefix_extra" "1,*,*,*,*")
7553 (set_attr "length_immediate" "1,*,*,*,*")
7554 (set_attr "mode" "TI,TI,TI,DI,DI")])
7555
7556 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7557 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7558 ;; alternatives pretty much forces the MMX alternative to be chosen.
7559 (define_insn "*vec_concatv2si_sse2"
7560 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7561 (vec_concat:V2SI
7562 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7563 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7564 "TARGET_SSE2"
7565 "@
7566 punpckldq\t{%2, %0|%0, %2}
7567 movd\t{%1, %0|%0, %1}
7568 punpckldq\t{%2, %0|%0, %2}
7569 movd\t{%1, %0|%0, %1}"
7570 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7571 (set_attr "mode" "TI,TI,DI,DI")])
7572
7573 (define_insn "*vec_concatv2si_sse"
7574 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7575 (vec_concat:V2SI
7576 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7577 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7578 "TARGET_SSE"
7579 "@
7580 unpcklps\t{%2, %0|%0, %2}
7581 movss\t{%1, %0|%0, %1}
7582 punpckldq\t{%2, %0|%0, %2}
7583 movd\t{%1, %0|%0, %1}"
7584 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7585 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7586
7587 (define_insn "*vec_concatv4si_1_avx"
7588 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7589 (vec_concat:V4SI
7590 (match_operand:V2SI 1 "register_operand" " x,x")
7591 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7592 "TARGET_AVX"
7593 "@
7594 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7595 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7596 [(set_attr "type" "sselog,ssemov")
7597 (set_attr "prefix" "vex")
7598 (set_attr "mode" "TI,V2SF")])
7599
7600 (define_insn "*vec_concatv4si_1"
7601 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7602 (vec_concat:V4SI
7603 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7604 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7605 "TARGET_SSE"
7606 "@
7607 punpcklqdq\t{%2, %0|%0, %2}
7608 movlhps\t{%2, %0|%0, %2}
7609 movhps\t{%2, %0|%0, %2}"
7610 [(set_attr "type" "sselog,ssemov,ssemov")
7611 (set_attr "mode" "TI,V4SF,V2SF")])
7612
7613 (define_insn "*vec_concatv2di_avx"
7614 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7615 (vec_concat:V2DI
7616 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7617 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7618 "!TARGET_64BIT && TARGET_AVX"
7619 "@
7620 vmovq\t{%1, %0|%0, %1}
7621 movq2dq\t{%1, %0|%0, %1}
7622 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7623 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7624 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7625 (set (attr "prefix")
7626 (if_then_else (eq_attr "alternative" "1")
7627 (const_string "orig")
7628 (const_string "vex")))
7629 (set_attr "mode" "TI,TI,TI,V2SF")])
7630
7631 (define_insn "vec_concatv2di"
7632 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7633 (vec_concat:V2DI
7634 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7635 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7636 "!TARGET_64BIT && TARGET_SSE"
7637 "@
7638 movq\t{%1, %0|%0, %1}
7639 movq2dq\t{%1, %0|%0, %1}
7640 punpcklqdq\t{%2, %0|%0, %2}
7641 movlhps\t{%2, %0|%0, %2}
7642 movhps\t{%2, %0|%0, %2}"
7643 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7644 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7645
7646 (define_insn "*vec_concatv2di_rex64_avx"
7647 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7648 (vec_concat:V2DI
7649 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7650 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7651 "TARGET_64BIT && TARGET_AVX"
7652 "@
7653 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7654 vmovq\t{%1, %0|%0, %1}
7655 vmovq\t{%1, %0|%0, %1}
7656 movq2dq\t{%1, %0|%0, %1}
7657 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7658 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7659 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7660 (set_attr "prefix_extra" "1,*,*,*,*,*")
7661 (set_attr "length_immediate" "1,*,*,*,*,*")
7662 (set (attr "prefix")
7663 (if_then_else (eq_attr "alternative" "3")
7664 (const_string "orig")
7665 (const_string "vex")))
7666 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7667
7668 (define_insn "*vec_concatv2di_rex64_sse4_1"
7669 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7670 (vec_concat:V2DI
7671 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7672 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7673 "TARGET_64BIT && TARGET_SSE4_1"
7674 "@
7675 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7676 movq\t{%1, %0|%0, %1}
7677 movq\t{%1, %0|%0, %1}
7678 movq2dq\t{%1, %0|%0, %1}
7679 punpcklqdq\t{%2, %0|%0, %2}
7680 movlhps\t{%2, %0|%0, %2}
7681 movhps\t{%2, %0|%0, %2}"
7682 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7683 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7684 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7685 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7686 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7687
7688 (define_insn "*vec_concatv2di_rex64_sse"
7689 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7690 (vec_concat:V2DI
7691 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7692 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7693 "TARGET_64BIT && TARGET_SSE"
7694 "@
7695 movq\t{%1, %0|%0, %1}
7696 movq\t{%1, %0|%0, %1}
7697 movq2dq\t{%1, %0|%0, %1}
7698 punpcklqdq\t{%2, %0|%0, %2}
7699 movlhps\t{%2, %0|%0, %2}
7700 movhps\t{%2, %0|%0, %2}"
7701 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7702 (set_attr "prefix_rex" "*,1,*,*,*,*")
7703 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7704
7705 (define_expand "vec_unpacku_hi_v16qi"
7706 [(match_operand:V8HI 0 "register_operand" "")
7707 (match_operand:V16QI 1 "register_operand" "")]
7708 "TARGET_SSE2"
7709 {
7710 if (TARGET_SSE4_1)
7711 ix86_expand_sse4_unpack (operands, true, true);
7712 else
7713 ix86_expand_sse_unpack (operands, true, true);
7714 DONE;
7715 })
7716
7717 (define_expand "vec_unpacks_hi_v16qi"
7718 [(match_operand:V8HI 0 "register_operand" "")
7719 (match_operand:V16QI 1 "register_operand" "")]
7720 "TARGET_SSE2"
7721 {
7722 if (TARGET_SSE4_1)
7723 ix86_expand_sse4_unpack (operands, false, true);
7724 else
7725 ix86_expand_sse_unpack (operands, false, true);
7726 DONE;
7727 })
7728
7729 (define_expand "vec_unpacku_lo_v16qi"
7730 [(match_operand:V8HI 0 "register_operand" "")
7731 (match_operand:V16QI 1 "register_operand" "")]
7732 "TARGET_SSE2"
7733 {
7734 if (TARGET_SSE4_1)
7735 ix86_expand_sse4_unpack (operands, true, false);
7736 else
7737 ix86_expand_sse_unpack (operands, true, false);
7738 DONE;
7739 })
7740
7741 (define_expand "vec_unpacks_lo_v16qi"
7742 [(match_operand:V8HI 0 "register_operand" "")
7743 (match_operand:V16QI 1 "register_operand" "")]
7744 "TARGET_SSE2"
7745 {
7746 if (TARGET_SSE4_1)
7747 ix86_expand_sse4_unpack (operands, false, false);
7748 else
7749 ix86_expand_sse_unpack (operands, false, false);
7750 DONE;
7751 })
7752
7753 (define_expand "vec_unpacku_hi_v8hi"
7754 [(match_operand:V4SI 0 "register_operand" "")
7755 (match_operand:V8HI 1 "register_operand" "")]
7756 "TARGET_SSE2"
7757 {
7758 if (TARGET_SSE4_1)
7759 ix86_expand_sse4_unpack (operands, true, true);
7760 else
7761 ix86_expand_sse_unpack (operands, true, true);
7762 DONE;
7763 })
7764
7765 (define_expand "vec_unpacks_hi_v8hi"
7766 [(match_operand:V4SI 0 "register_operand" "")
7767 (match_operand:V8HI 1 "register_operand" "")]
7768 "TARGET_SSE2"
7769 {
7770 if (TARGET_SSE4_1)
7771 ix86_expand_sse4_unpack (operands, false, true);
7772 else
7773 ix86_expand_sse_unpack (operands, false, true);
7774 DONE;
7775 })
7776
7777 (define_expand "vec_unpacku_lo_v8hi"
7778 [(match_operand:V4SI 0 "register_operand" "")
7779 (match_operand:V8HI 1 "register_operand" "")]
7780 "TARGET_SSE2"
7781 {
7782 if (TARGET_SSE4_1)
7783 ix86_expand_sse4_unpack (operands, true, false);
7784 else
7785 ix86_expand_sse_unpack (operands, true, false);
7786 DONE;
7787 })
7788
7789 (define_expand "vec_unpacks_lo_v8hi"
7790 [(match_operand:V4SI 0 "register_operand" "")
7791 (match_operand:V8HI 1 "register_operand" "")]
7792 "TARGET_SSE2"
7793 {
7794 if (TARGET_SSE4_1)
7795 ix86_expand_sse4_unpack (operands, false, false);
7796 else
7797 ix86_expand_sse_unpack (operands, false, false);
7798 DONE;
7799 })
7800
7801 (define_expand "vec_unpacku_hi_v4si"
7802 [(match_operand:V2DI 0 "register_operand" "")
7803 (match_operand:V4SI 1 "register_operand" "")]
7804 "TARGET_SSE2"
7805 {
7806 if (TARGET_SSE4_1)
7807 ix86_expand_sse4_unpack (operands, true, true);
7808 else
7809 ix86_expand_sse_unpack (operands, true, true);
7810 DONE;
7811 })
7812
7813 (define_expand "vec_unpacks_hi_v4si"
7814 [(match_operand:V2DI 0 "register_operand" "")
7815 (match_operand:V4SI 1 "register_operand" "")]
7816 "TARGET_SSE2"
7817 {
7818 if (TARGET_SSE4_1)
7819 ix86_expand_sse4_unpack (operands, false, true);
7820 else
7821 ix86_expand_sse_unpack (operands, false, true);
7822 DONE;
7823 })
7824
7825 (define_expand "vec_unpacku_lo_v4si"
7826 [(match_operand:V2DI 0 "register_operand" "")
7827 (match_operand:V4SI 1 "register_operand" "")]
7828 "TARGET_SSE2"
7829 {
7830 if (TARGET_SSE4_1)
7831 ix86_expand_sse4_unpack (operands, true, false);
7832 else
7833 ix86_expand_sse_unpack (operands, true, false);
7834 DONE;
7835 })
7836
7837 (define_expand "vec_unpacks_lo_v4si"
7838 [(match_operand:V2DI 0 "register_operand" "")
7839 (match_operand:V4SI 1 "register_operand" "")]
7840 "TARGET_SSE2"
7841 {
7842 if (TARGET_SSE4_1)
7843 ix86_expand_sse4_unpack (operands, false, false);
7844 else
7845 ix86_expand_sse_unpack (operands, false, false);
7846 DONE;
7847 })
7848
7849 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7850 ;;
7851 ;; Miscellaneous
7852 ;;
7853 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7854
7855 (define_expand "sse2_uavgv16qi3"
7856 [(set (match_operand:V16QI 0 "register_operand" "")
7857 (truncate:V16QI
7858 (lshiftrt:V16HI
7859 (plus:V16HI
7860 (plus:V16HI
7861 (zero_extend:V16HI
7862 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7863 (zero_extend:V16HI
7864 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7865 (const_vector:V16QI [(const_int 1) (const_int 1)
7866 (const_int 1) (const_int 1)
7867 (const_int 1) (const_int 1)
7868 (const_int 1) (const_int 1)
7869 (const_int 1) (const_int 1)
7870 (const_int 1) (const_int 1)
7871 (const_int 1) (const_int 1)
7872 (const_int 1) (const_int 1)]))
7873 (const_int 1))))]
7874 "TARGET_SSE2"
7875 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7876
7877 (define_insn "*avx_uavgv16qi3"
7878 [(set (match_operand:V16QI 0 "register_operand" "=x")
7879 (truncate:V16QI
7880 (lshiftrt:V16HI
7881 (plus:V16HI
7882 (plus:V16HI
7883 (zero_extend:V16HI
7884 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7885 (zero_extend:V16HI
7886 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7887 (const_vector:V16QI [(const_int 1) (const_int 1)
7888 (const_int 1) (const_int 1)
7889 (const_int 1) (const_int 1)
7890 (const_int 1) (const_int 1)
7891 (const_int 1) (const_int 1)
7892 (const_int 1) (const_int 1)
7893 (const_int 1) (const_int 1)
7894 (const_int 1) (const_int 1)]))
7895 (const_int 1))))]
7896 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7897 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7898 [(set_attr "type" "sseiadd")
7899 (set_attr "prefix" "vex")
7900 (set_attr "mode" "TI")])
7901
7902 (define_insn "*sse2_uavgv16qi3"
7903 [(set (match_operand:V16QI 0 "register_operand" "=x")
7904 (truncate:V16QI
7905 (lshiftrt:V16HI
7906 (plus:V16HI
7907 (plus:V16HI
7908 (zero_extend:V16HI
7909 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7910 (zero_extend:V16HI
7911 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7912 (const_vector:V16QI [(const_int 1) (const_int 1)
7913 (const_int 1) (const_int 1)
7914 (const_int 1) (const_int 1)
7915 (const_int 1) (const_int 1)
7916 (const_int 1) (const_int 1)
7917 (const_int 1) (const_int 1)
7918 (const_int 1) (const_int 1)
7919 (const_int 1) (const_int 1)]))
7920 (const_int 1))))]
7921 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7922 "pavgb\t{%2, %0|%0, %2}"
7923 [(set_attr "type" "sseiadd")
7924 (set_attr "prefix_data16" "1")
7925 (set_attr "mode" "TI")])
7926
7927 (define_expand "sse2_uavgv8hi3"
7928 [(set (match_operand:V8HI 0 "register_operand" "")
7929 (truncate:V8HI
7930 (lshiftrt:V8SI
7931 (plus:V8SI
7932 (plus:V8SI
7933 (zero_extend:V8SI
7934 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7935 (zero_extend:V8SI
7936 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7937 (const_vector:V8HI [(const_int 1) (const_int 1)
7938 (const_int 1) (const_int 1)
7939 (const_int 1) (const_int 1)
7940 (const_int 1) (const_int 1)]))
7941 (const_int 1))))]
7942 "TARGET_SSE2"
7943 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7944
7945 (define_insn "*avx_uavgv8hi3"
7946 [(set (match_operand:V8HI 0 "register_operand" "=x")
7947 (truncate:V8HI
7948 (lshiftrt:V8SI
7949 (plus:V8SI
7950 (plus:V8SI
7951 (zero_extend:V8SI
7952 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7953 (zero_extend:V8SI
7954 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7955 (const_vector:V8HI [(const_int 1) (const_int 1)
7956 (const_int 1) (const_int 1)
7957 (const_int 1) (const_int 1)
7958 (const_int 1) (const_int 1)]))
7959 (const_int 1))))]
7960 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7961 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7962 [(set_attr "type" "sseiadd")
7963 (set_attr "prefix" "vex")
7964 (set_attr "mode" "TI")])
7965
7966 (define_insn "*sse2_uavgv8hi3"
7967 [(set (match_operand:V8HI 0 "register_operand" "=x")
7968 (truncate:V8HI
7969 (lshiftrt:V8SI
7970 (plus:V8SI
7971 (plus:V8SI
7972 (zero_extend:V8SI
7973 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7974 (zero_extend:V8SI
7975 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7976 (const_vector:V8HI [(const_int 1) (const_int 1)
7977 (const_int 1) (const_int 1)
7978 (const_int 1) (const_int 1)
7979 (const_int 1) (const_int 1)]))
7980 (const_int 1))))]
7981 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7982 "pavgw\t{%2, %0|%0, %2}"
7983 [(set_attr "type" "sseiadd")
7984 (set_attr "prefix_data16" "1")
7985 (set_attr "mode" "TI")])
7986
7987 ;; The correct representation for this is absolutely enormous, and
7988 ;; surely not generally useful.
7989 (define_insn "*avx_psadbw"
7990 [(set (match_operand:V2DI 0 "register_operand" "=x")
7991 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7992 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7993 UNSPEC_PSADBW))]
7994 "TARGET_AVX"
7995 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7996 [(set_attr "type" "sseiadd")
7997 (set_attr "prefix" "vex")
7998 (set_attr "mode" "TI")])
7999
8000 (define_insn "sse2_psadbw"
8001 [(set (match_operand:V2DI 0 "register_operand" "=x")
8002 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
8003 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8004 UNSPEC_PSADBW))]
8005 "TARGET_SSE2"
8006 "psadbw\t{%2, %0|%0, %2}"
8007 [(set_attr "type" "sseiadd")
8008 (set_attr "atom_unit" "simul")
8009 (set_attr "prefix_data16" "1")
8010 (set_attr "mode" "TI")])
8011
8012 (define_insn "avx_movmsk<ssemodesuffix>256"
8013 [(set (match_operand:SI 0 "register_operand" "=r")
8014 (unspec:SI
8015 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8016 UNSPEC_MOVMSK))]
8017 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8018 "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8019 [(set_attr "type" "ssecvt")
8020 (set_attr "prefix" "vex")
8021 (set_attr "mode" "<MODE>")])
8022
8023 (define_insn "<sse>_movmsk<ssemodesuffix>"
8024 [(set (match_operand:SI 0 "register_operand" "=r")
8025 (unspec:SI
8026 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8027 UNSPEC_MOVMSK))]
8028 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8029 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8030 [(set_attr "type" "ssemov")
8031 (set_attr "prefix" "maybe_vex")
8032 (set_attr "mode" "<MODE>")])
8033
8034 (define_insn "sse2_pmovmskb"
8035 [(set (match_operand:SI 0 "register_operand" "=r")
8036 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8037 UNSPEC_MOVMSK))]
8038 "TARGET_SSE2"
8039 "%vpmovmskb\t{%1, %0|%0, %1}"
8040 [(set_attr "type" "ssemov")
8041 (set_attr "prefix_data16" "1")
8042 (set_attr "prefix" "maybe_vex")
8043 (set_attr "mode" "SI")])
8044
8045 (define_expand "sse2_maskmovdqu"
8046 [(set (match_operand:V16QI 0 "memory_operand" "")
8047 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8048 (match_operand:V16QI 2 "register_operand" "")
8049 (match_dup 0)]
8050 UNSPEC_MASKMOV))]
8051 "TARGET_SSE2")
8052
8053 (define_insn "*sse2_maskmovdqu"
8054 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8055 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8056 (match_operand:V16QI 2 "register_operand" "x")
8057 (mem:V16QI (match_dup 0))]
8058 UNSPEC_MASKMOV))]
8059 "TARGET_SSE2 && !TARGET_64BIT"
8060 ;; @@@ check ordering of operands in intel/nonintel syntax
8061 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8062 [(set_attr "type" "ssemov")
8063 (set_attr "prefix_data16" "1")
8064 ;; The implicit %rdi operand confuses default length_vex computation.
8065 (set_attr "length_vex" "3")
8066 (set_attr "prefix" "maybe_vex")
8067 (set_attr "mode" "TI")])
8068
8069 (define_insn "*sse2_maskmovdqu_rex64"
8070 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8071 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8072 (match_operand:V16QI 2 "register_operand" "x")
8073 (mem:V16QI (match_dup 0))]
8074 UNSPEC_MASKMOV))]
8075 "TARGET_SSE2 && TARGET_64BIT"
8076 ;; @@@ check ordering of operands in intel/nonintel syntax
8077 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8078 [(set_attr "type" "ssemov")
8079 (set_attr "prefix_data16" "1")
8080 ;; The implicit %rdi operand confuses default length_vex computation.
8081 (set (attr "length_vex")
8082 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8083 (set_attr "prefix" "maybe_vex")
8084 (set_attr "mode" "TI")])
8085
8086 (define_insn "sse_ldmxcsr"
8087 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8088 UNSPECV_LDMXCSR)]
8089 "TARGET_SSE"
8090 "%vldmxcsr\t%0"
8091 [(set_attr "type" "sse")
8092 (set_attr "atom_sse_attr" "mxcsr")
8093 (set_attr "prefix" "maybe_vex")
8094 (set_attr "memory" "load")])
8095
8096 (define_insn "sse_stmxcsr"
8097 [(set (match_operand:SI 0 "memory_operand" "=m")
8098 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8099 "TARGET_SSE"
8100 "%vstmxcsr\t%0"
8101 [(set_attr "type" "sse")
8102 (set_attr "atom_sse_attr" "mxcsr")
8103 (set_attr "prefix" "maybe_vex")
8104 (set_attr "memory" "store")])
8105
8106 (define_expand "sse_sfence"
8107 [(set (match_dup 0)
8108 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8109 "TARGET_SSE || TARGET_3DNOW_A"
8110 {
8111 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8112 MEM_VOLATILE_P (operands[0]) = 1;
8113 })
8114
8115 (define_insn "*sse_sfence"
8116 [(set (match_operand:BLK 0 "" "")
8117 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8118 "TARGET_SSE || TARGET_3DNOW_A"
8119 "sfence"
8120 [(set_attr "type" "sse")
8121 (set_attr "length_address" "0")
8122 (set_attr "atom_sse_attr" "fence")
8123 (set_attr "memory" "unknown")])
8124
8125 (define_insn "sse2_clflush"
8126 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8127 UNSPECV_CLFLUSH)]
8128 "TARGET_SSE2"
8129 "clflush\t%a0"
8130 [(set_attr "type" "sse")
8131 (set_attr "atom_sse_attr" "fence")
8132 (set_attr "memory" "unknown")])
8133
8134 (define_expand "sse2_mfence"
8135 [(set (match_dup 0)
8136 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8137 "TARGET_SSE2"
8138 {
8139 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8140 MEM_VOLATILE_P (operands[0]) = 1;
8141 })
8142
8143 (define_insn "*sse2_mfence"
8144 [(set (match_operand:BLK 0 "" "")
8145 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8146 "TARGET_64BIT || TARGET_SSE2"
8147 "mfence"
8148 [(set_attr "type" "sse")
8149 (set_attr "length_address" "0")
8150 (set_attr "atom_sse_attr" "fence")
8151 (set_attr "memory" "unknown")])
8152
8153 (define_expand "sse2_lfence"
8154 [(set (match_dup 0)
8155 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8156 "TARGET_SSE2"
8157 {
8158 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8159 MEM_VOLATILE_P (operands[0]) = 1;
8160 })
8161
8162 (define_insn "*sse2_lfence"
8163 [(set (match_operand:BLK 0 "" "")
8164 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8165 "TARGET_SSE2"
8166 "lfence"
8167 [(set_attr "type" "sse")
8168 (set_attr "length_address" "0")
8169 (set_attr "atom_sse_attr" "lfence")
8170 (set_attr "memory" "unknown")])
8171
8172 (define_insn "sse3_mwait"
8173 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8174 (match_operand:SI 1 "register_operand" "c")]
8175 UNSPECV_MWAIT)]
8176 "TARGET_SSE3"
8177 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8178 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8179 ;; we only need to set up 32bit registers.
8180 "mwait"
8181 [(set_attr "length" "3")])
8182
8183 (define_insn "sse3_monitor"
8184 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8185 (match_operand:SI 1 "register_operand" "c")
8186 (match_operand:SI 2 "register_operand" "d")]
8187 UNSPECV_MONITOR)]
8188 "TARGET_SSE3 && !TARGET_64BIT"
8189 "monitor\t%0, %1, %2"
8190 [(set_attr "length" "3")])
8191
8192 (define_insn "sse3_monitor64"
8193 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8194 (match_operand:SI 1 "register_operand" "c")
8195 (match_operand:SI 2 "register_operand" "d")]
8196 UNSPECV_MONITOR)]
8197 "TARGET_SSE3 && TARGET_64BIT"
8198 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8199 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8200 ;; zero extended to 64bit, we only need to set up 32bit registers.
8201 "monitor"
8202 [(set_attr "length" "3")])
8203
8204 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8205 ;;
8206 ;; SSSE3 instructions
8207 ;;
8208 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8209
8210 (define_insn "*avx_phaddwv8hi3"
8211 [(set (match_operand:V8HI 0 "register_operand" "=x")
8212 (vec_concat:V8HI
8213 (vec_concat:V4HI
8214 (vec_concat:V2HI
8215 (plus:HI
8216 (vec_select:HI
8217 (match_operand:V8HI 1 "register_operand" "x")
8218 (parallel [(const_int 0)]))
8219 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8220 (plus:HI
8221 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8222 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8223 (vec_concat:V2HI
8224 (plus:HI
8225 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8226 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8227 (plus:HI
8228 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8229 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8230 (vec_concat:V4HI
8231 (vec_concat:V2HI
8232 (plus:HI
8233 (vec_select:HI
8234 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8235 (parallel [(const_int 0)]))
8236 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8237 (plus:HI
8238 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8239 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8240 (vec_concat:V2HI
8241 (plus:HI
8242 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8243 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8244 (plus:HI
8245 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8246 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8247 "TARGET_AVX"
8248 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8249 [(set_attr "type" "sseiadd")
8250 (set_attr "prefix_extra" "1")
8251 (set_attr "prefix" "vex")
8252 (set_attr "mode" "TI")])
8253
8254 (define_insn "ssse3_phaddwv8hi3"
8255 [(set (match_operand:V8HI 0 "register_operand" "=x")
8256 (vec_concat:V8HI
8257 (vec_concat:V4HI
8258 (vec_concat:V2HI
8259 (plus:HI
8260 (vec_select:HI
8261 (match_operand:V8HI 1 "register_operand" "0")
8262 (parallel [(const_int 0)]))
8263 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8264 (plus:HI
8265 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8266 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8267 (vec_concat:V2HI
8268 (plus:HI
8269 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8270 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8271 (plus:HI
8272 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8273 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8274 (vec_concat:V4HI
8275 (vec_concat:V2HI
8276 (plus:HI
8277 (vec_select:HI
8278 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8279 (parallel [(const_int 0)]))
8280 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8281 (plus:HI
8282 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8283 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8284 (vec_concat:V2HI
8285 (plus:HI
8286 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8287 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8288 (plus:HI
8289 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8290 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8291 "TARGET_SSSE3"
8292 "phaddw\t{%2, %0|%0, %2}"
8293 [(set_attr "type" "sseiadd")
8294 (set_attr "atom_unit" "complex")
8295 (set_attr "prefix_data16" "1")
8296 (set_attr "prefix_extra" "1")
8297 (set_attr "mode" "TI")])
8298
8299 (define_insn "ssse3_phaddwv4hi3"
8300 [(set (match_operand:V4HI 0 "register_operand" "=y")
8301 (vec_concat:V4HI
8302 (vec_concat:V2HI
8303 (plus:HI
8304 (vec_select:HI
8305 (match_operand:V4HI 1 "register_operand" "0")
8306 (parallel [(const_int 0)]))
8307 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8308 (plus:HI
8309 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8310 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8311 (vec_concat:V2HI
8312 (plus:HI
8313 (vec_select:HI
8314 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8315 (parallel [(const_int 0)]))
8316 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8317 (plus:HI
8318 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8319 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8320 "TARGET_SSSE3"
8321 "phaddw\t{%2, %0|%0, %2}"
8322 [(set_attr "type" "sseiadd")
8323 (set_attr "atom_unit" "complex")
8324 (set_attr "prefix_extra" "1")
8325 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8326 (set_attr "mode" "DI")])
8327
8328 (define_insn "*avx_phadddv4si3"
8329 [(set (match_operand:V4SI 0 "register_operand" "=x")
8330 (vec_concat:V4SI
8331 (vec_concat:V2SI
8332 (plus:SI
8333 (vec_select:SI
8334 (match_operand:V4SI 1 "register_operand" "x")
8335 (parallel [(const_int 0)]))
8336 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8337 (plus:SI
8338 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8339 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8340 (vec_concat:V2SI
8341 (plus:SI
8342 (vec_select:SI
8343 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8344 (parallel [(const_int 0)]))
8345 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8346 (plus:SI
8347 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8348 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8349 "TARGET_AVX"
8350 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8351 [(set_attr "type" "sseiadd")
8352 (set_attr "prefix_extra" "1")
8353 (set_attr "prefix" "vex")
8354 (set_attr "mode" "TI")])
8355
8356 (define_insn "ssse3_phadddv4si3"
8357 [(set (match_operand:V4SI 0 "register_operand" "=x")
8358 (vec_concat:V4SI
8359 (vec_concat:V2SI
8360 (plus:SI
8361 (vec_select:SI
8362 (match_operand:V4SI 1 "register_operand" "0")
8363 (parallel [(const_int 0)]))
8364 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8365 (plus:SI
8366 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8367 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8368 (vec_concat:V2SI
8369 (plus:SI
8370 (vec_select:SI
8371 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8372 (parallel [(const_int 0)]))
8373 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8374 (plus:SI
8375 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8376 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8377 "TARGET_SSSE3"
8378 "phaddd\t{%2, %0|%0, %2}"
8379 [(set_attr "type" "sseiadd")
8380 (set_attr "atom_unit" "complex")
8381 (set_attr "prefix_data16" "1")
8382 (set_attr "prefix_extra" "1")
8383 (set_attr "mode" "TI")])
8384
8385 (define_insn "ssse3_phadddv2si3"
8386 [(set (match_operand:V2SI 0 "register_operand" "=y")
8387 (vec_concat:V2SI
8388 (plus:SI
8389 (vec_select:SI
8390 (match_operand:V2SI 1 "register_operand" "0")
8391 (parallel [(const_int 0)]))
8392 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8393 (plus:SI
8394 (vec_select:SI
8395 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8396 (parallel [(const_int 0)]))
8397 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8398 "TARGET_SSSE3"
8399 "phaddd\t{%2, %0|%0, %2}"
8400 [(set_attr "type" "sseiadd")
8401 (set_attr "atom_unit" "complex")
8402 (set_attr "prefix_extra" "1")
8403 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8404 (set_attr "mode" "DI")])
8405
8406 (define_insn "*avx_phaddswv8hi3"
8407 [(set (match_operand:V8HI 0 "register_operand" "=x")
8408 (vec_concat:V8HI
8409 (vec_concat:V4HI
8410 (vec_concat:V2HI
8411 (ss_plus:HI
8412 (vec_select:HI
8413 (match_operand:V8HI 1 "register_operand" "x")
8414 (parallel [(const_int 0)]))
8415 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8416 (ss_plus:HI
8417 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8418 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8419 (vec_concat:V2HI
8420 (ss_plus:HI
8421 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8422 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8423 (ss_plus:HI
8424 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8425 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8426 (vec_concat:V4HI
8427 (vec_concat:V2HI
8428 (ss_plus:HI
8429 (vec_select:HI
8430 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8431 (parallel [(const_int 0)]))
8432 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8433 (ss_plus:HI
8434 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8435 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8436 (vec_concat:V2HI
8437 (ss_plus:HI
8438 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8439 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8440 (ss_plus:HI
8441 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8442 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8443 "TARGET_AVX"
8444 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8445 [(set_attr "type" "sseiadd")
8446 (set_attr "prefix_extra" "1")
8447 (set_attr "prefix" "vex")
8448 (set_attr "mode" "TI")])
8449
8450 (define_insn "ssse3_phaddswv8hi3"
8451 [(set (match_operand:V8HI 0 "register_operand" "=x")
8452 (vec_concat:V8HI
8453 (vec_concat:V4HI
8454 (vec_concat:V2HI
8455 (ss_plus:HI
8456 (vec_select:HI
8457 (match_operand:V8HI 1 "register_operand" "0")
8458 (parallel [(const_int 0)]))
8459 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8460 (ss_plus:HI
8461 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8462 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8463 (vec_concat:V2HI
8464 (ss_plus:HI
8465 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8466 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8467 (ss_plus:HI
8468 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8469 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8470 (vec_concat:V4HI
8471 (vec_concat:V2HI
8472 (ss_plus:HI
8473 (vec_select:HI
8474 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8475 (parallel [(const_int 0)]))
8476 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8477 (ss_plus:HI
8478 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8479 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8480 (vec_concat:V2HI
8481 (ss_plus:HI
8482 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8483 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8484 (ss_plus:HI
8485 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8486 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8487 "TARGET_SSSE3"
8488 "phaddsw\t{%2, %0|%0, %2}"
8489 [(set_attr "type" "sseiadd")
8490 (set_attr "atom_unit" "complex")
8491 (set_attr "prefix_data16" "1")
8492 (set_attr "prefix_extra" "1")
8493 (set_attr "mode" "TI")])
8494
8495 (define_insn "ssse3_phaddswv4hi3"
8496 [(set (match_operand:V4HI 0 "register_operand" "=y")
8497 (vec_concat:V4HI
8498 (vec_concat:V2HI
8499 (ss_plus:HI
8500 (vec_select:HI
8501 (match_operand:V4HI 1 "register_operand" "0")
8502 (parallel [(const_int 0)]))
8503 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8504 (ss_plus:HI
8505 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8506 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8507 (vec_concat:V2HI
8508 (ss_plus:HI
8509 (vec_select:HI
8510 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8511 (parallel [(const_int 0)]))
8512 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8513 (ss_plus:HI
8514 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8515 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8516 "TARGET_SSSE3"
8517 "phaddsw\t{%2, %0|%0, %2}"
8518 [(set_attr "type" "sseiadd")
8519 (set_attr "atom_unit" "complex")
8520 (set_attr "prefix_extra" "1")
8521 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8522 (set_attr "mode" "DI")])
8523
8524 (define_insn "*avx_phsubwv8hi3"
8525 [(set (match_operand:V8HI 0 "register_operand" "=x")
8526 (vec_concat:V8HI
8527 (vec_concat:V4HI
8528 (vec_concat:V2HI
8529 (minus:HI
8530 (vec_select:HI
8531 (match_operand:V8HI 1 "register_operand" "x")
8532 (parallel [(const_int 0)]))
8533 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8534 (minus:HI
8535 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8536 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8537 (vec_concat:V2HI
8538 (minus:HI
8539 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8540 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8541 (minus:HI
8542 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8543 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8544 (vec_concat:V4HI
8545 (vec_concat:V2HI
8546 (minus:HI
8547 (vec_select:HI
8548 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8549 (parallel [(const_int 0)]))
8550 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8551 (minus:HI
8552 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8553 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8554 (vec_concat:V2HI
8555 (minus:HI
8556 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8557 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8558 (minus:HI
8559 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8560 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8561 "TARGET_AVX"
8562 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8563 [(set_attr "type" "sseiadd")
8564 (set_attr "prefix_extra" "1")
8565 (set_attr "prefix" "vex")
8566 (set_attr "mode" "TI")])
8567
8568 (define_insn "ssse3_phsubwv8hi3"
8569 [(set (match_operand:V8HI 0 "register_operand" "=x")
8570 (vec_concat:V8HI
8571 (vec_concat:V4HI
8572 (vec_concat:V2HI
8573 (minus:HI
8574 (vec_select:HI
8575 (match_operand:V8HI 1 "register_operand" "0")
8576 (parallel [(const_int 0)]))
8577 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8578 (minus:HI
8579 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8580 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8581 (vec_concat:V2HI
8582 (minus:HI
8583 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8584 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8585 (minus:HI
8586 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8587 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8588 (vec_concat:V4HI
8589 (vec_concat:V2HI
8590 (minus:HI
8591 (vec_select:HI
8592 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8593 (parallel [(const_int 0)]))
8594 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8595 (minus:HI
8596 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8597 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8598 (vec_concat:V2HI
8599 (minus:HI
8600 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8601 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8602 (minus:HI
8603 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8604 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8605 "TARGET_SSSE3"
8606 "phsubw\t{%2, %0|%0, %2}"
8607 [(set_attr "type" "sseiadd")
8608 (set_attr "atom_unit" "complex")
8609 (set_attr "prefix_data16" "1")
8610 (set_attr "prefix_extra" "1")
8611 (set_attr "mode" "TI")])
8612
8613 (define_insn "ssse3_phsubwv4hi3"
8614 [(set (match_operand:V4HI 0 "register_operand" "=y")
8615 (vec_concat:V4HI
8616 (vec_concat:V2HI
8617 (minus:HI
8618 (vec_select:HI
8619 (match_operand:V4HI 1 "register_operand" "0")
8620 (parallel [(const_int 0)]))
8621 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8622 (minus:HI
8623 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8624 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8625 (vec_concat:V2HI
8626 (minus:HI
8627 (vec_select:HI
8628 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8629 (parallel [(const_int 0)]))
8630 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8631 (minus:HI
8632 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8633 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8634 "TARGET_SSSE3"
8635 "phsubw\t{%2, %0|%0, %2}"
8636 [(set_attr "type" "sseiadd")
8637 (set_attr "atom_unit" "complex")
8638 (set_attr "prefix_extra" "1")
8639 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8640 (set_attr "mode" "DI")])
8641
8642 (define_insn "*avx_phsubdv4si3"
8643 [(set (match_operand:V4SI 0 "register_operand" "=x")
8644 (vec_concat:V4SI
8645 (vec_concat:V2SI
8646 (minus:SI
8647 (vec_select:SI
8648 (match_operand:V4SI 1 "register_operand" "x")
8649 (parallel [(const_int 0)]))
8650 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8651 (minus:SI
8652 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8653 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8654 (vec_concat:V2SI
8655 (minus:SI
8656 (vec_select:SI
8657 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8658 (parallel [(const_int 0)]))
8659 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8660 (minus:SI
8661 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8662 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8663 "TARGET_AVX"
8664 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8665 [(set_attr "type" "sseiadd")
8666 (set_attr "prefix_extra" "1")
8667 (set_attr "prefix" "vex")
8668 (set_attr "mode" "TI")])
8669
8670 (define_insn "ssse3_phsubdv4si3"
8671 [(set (match_operand:V4SI 0 "register_operand" "=x")
8672 (vec_concat:V4SI
8673 (vec_concat:V2SI
8674 (minus:SI
8675 (vec_select:SI
8676 (match_operand:V4SI 1 "register_operand" "0")
8677 (parallel [(const_int 0)]))
8678 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8679 (minus:SI
8680 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8681 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8682 (vec_concat:V2SI
8683 (minus:SI
8684 (vec_select:SI
8685 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8686 (parallel [(const_int 0)]))
8687 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8688 (minus:SI
8689 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8690 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8691 "TARGET_SSSE3"
8692 "phsubd\t{%2, %0|%0, %2}"
8693 [(set_attr "type" "sseiadd")
8694 (set_attr "atom_unit" "complex")
8695 (set_attr "prefix_data16" "1")
8696 (set_attr "prefix_extra" "1")
8697 (set_attr "mode" "TI")])
8698
8699 (define_insn "ssse3_phsubdv2si3"
8700 [(set (match_operand:V2SI 0 "register_operand" "=y")
8701 (vec_concat:V2SI
8702 (minus:SI
8703 (vec_select:SI
8704 (match_operand:V2SI 1 "register_operand" "0")
8705 (parallel [(const_int 0)]))
8706 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8707 (minus:SI
8708 (vec_select:SI
8709 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8710 (parallel [(const_int 0)]))
8711 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8712 "TARGET_SSSE3"
8713 "phsubd\t{%2, %0|%0, %2}"
8714 [(set_attr "type" "sseiadd")
8715 (set_attr "atom_unit" "complex")
8716 (set_attr "prefix_extra" "1")
8717 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8718 (set_attr "mode" "DI")])
8719
8720 (define_insn "*avx_phsubswv8hi3"
8721 [(set (match_operand:V8HI 0 "register_operand" "=x")
8722 (vec_concat:V8HI
8723 (vec_concat:V4HI
8724 (vec_concat:V2HI
8725 (ss_minus:HI
8726 (vec_select:HI
8727 (match_operand:V8HI 1 "register_operand" "x")
8728 (parallel [(const_int 0)]))
8729 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8730 (ss_minus:HI
8731 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8732 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8733 (vec_concat:V2HI
8734 (ss_minus:HI
8735 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8736 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8737 (ss_minus:HI
8738 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8739 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8740 (vec_concat:V4HI
8741 (vec_concat:V2HI
8742 (ss_minus:HI
8743 (vec_select:HI
8744 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8745 (parallel [(const_int 0)]))
8746 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8747 (ss_minus:HI
8748 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8749 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8750 (vec_concat:V2HI
8751 (ss_minus:HI
8752 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8753 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8754 (ss_minus:HI
8755 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8756 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8757 "TARGET_AVX"
8758 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8759 [(set_attr "type" "sseiadd")
8760 (set_attr "prefix_extra" "1")
8761 (set_attr "prefix" "vex")
8762 (set_attr "mode" "TI")])
8763
8764 (define_insn "ssse3_phsubswv8hi3"
8765 [(set (match_operand:V8HI 0 "register_operand" "=x")
8766 (vec_concat:V8HI
8767 (vec_concat:V4HI
8768 (vec_concat:V2HI
8769 (ss_minus:HI
8770 (vec_select:HI
8771 (match_operand:V8HI 1 "register_operand" "0")
8772 (parallel [(const_int 0)]))
8773 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8774 (ss_minus:HI
8775 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8776 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8777 (vec_concat:V2HI
8778 (ss_minus:HI
8779 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8780 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8781 (ss_minus:HI
8782 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8783 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8784 (vec_concat:V4HI
8785 (vec_concat:V2HI
8786 (ss_minus:HI
8787 (vec_select:HI
8788 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8789 (parallel [(const_int 0)]))
8790 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8791 (ss_minus:HI
8792 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8793 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8794 (vec_concat:V2HI
8795 (ss_minus:HI
8796 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8797 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8798 (ss_minus:HI
8799 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8800 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8801 "TARGET_SSSE3"
8802 "phsubsw\t{%2, %0|%0, %2}"
8803 [(set_attr "type" "sseiadd")
8804 (set_attr "atom_unit" "complex")
8805 (set_attr "prefix_data16" "1")
8806 (set_attr "prefix_extra" "1")
8807 (set_attr "mode" "TI")])
8808
8809 (define_insn "ssse3_phsubswv4hi3"
8810 [(set (match_operand:V4HI 0 "register_operand" "=y")
8811 (vec_concat:V4HI
8812 (vec_concat:V2HI
8813 (ss_minus:HI
8814 (vec_select:HI
8815 (match_operand:V4HI 1 "register_operand" "0")
8816 (parallel [(const_int 0)]))
8817 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8818 (ss_minus:HI
8819 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8820 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8821 (vec_concat:V2HI
8822 (ss_minus:HI
8823 (vec_select:HI
8824 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8825 (parallel [(const_int 0)]))
8826 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8827 (ss_minus:HI
8828 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8829 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8830 "TARGET_SSSE3"
8831 "phsubsw\t{%2, %0|%0, %2}"
8832 [(set_attr "type" "sseiadd")
8833 (set_attr "atom_unit" "complex")
8834 (set_attr "prefix_extra" "1")
8835 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8836 (set_attr "mode" "DI")])
8837
8838 (define_insn "*avx_pmaddubsw128"
8839 [(set (match_operand:V8HI 0 "register_operand" "=x")
8840 (ss_plus:V8HI
8841 (mult:V8HI
8842 (zero_extend:V8HI
8843 (vec_select:V4QI
8844 (match_operand:V16QI 1 "register_operand" "x")
8845 (parallel [(const_int 0)
8846 (const_int 2)
8847 (const_int 4)
8848 (const_int 6)
8849 (const_int 8)
8850 (const_int 10)
8851 (const_int 12)
8852 (const_int 14)])))
8853 (sign_extend:V8HI
8854 (vec_select:V8QI
8855 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8856 (parallel [(const_int 0)
8857 (const_int 2)
8858 (const_int 4)
8859 (const_int 6)
8860 (const_int 8)
8861 (const_int 10)
8862 (const_int 12)
8863 (const_int 14)]))))
8864 (mult:V8HI
8865 (zero_extend:V8HI
8866 (vec_select:V16QI (match_dup 1)
8867 (parallel [(const_int 1)
8868 (const_int 3)
8869 (const_int 5)
8870 (const_int 7)
8871 (const_int 9)
8872 (const_int 11)
8873 (const_int 13)
8874 (const_int 15)])))
8875 (sign_extend:V8HI
8876 (vec_select:V16QI (match_dup 2)
8877 (parallel [(const_int 1)
8878 (const_int 3)
8879 (const_int 5)
8880 (const_int 7)
8881 (const_int 9)
8882 (const_int 11)
8883 (const_int 13)
8884 (const_int 15)]))))))]
8885 "TARGET_AVX"
8886 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8887 [(set_attr "type" "sseiadd")
8888 (set_attr "prefix_extra" "1")
8889 (set_attr "prefix" "vex")
8890 (set_attr "mode" "TI")])
8891
8892 (define_insn "ssse3_pmaddubsw128"
8893 [(set (match_operand:V8HI 0 "register_operand" "=x")
8894 (ss_plus:V8HI
8895 (mult:V8HI
8896 (zero_extend:V8HI
8897 (vec_select:V4QI
8898 (match_operand:V16QI 1 "register_operand" "0")
8899 (parallel [(const_int 0)
8900 (const_int 2)
8901 (const_int 4)
8902 (const_int 6)
8903 (const_int 8)
8904 (const_int 10)
8905 (const_int 12)
8906 (const_int 14)])))
8907 (sign_extend:V8HI
8908 (vec_select:V8QI
8909 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8910 (parallel [(const_int 0)
8911 (const_int 2)
8912 (const_int 4)
8913 (const_int 6)
8914 (const_int 8)
8915 (const_int 10)
8916 (const_int 12)
8917 (const_int 14)]))))
8918 (mult:V8HI
8919 (zero_extend:V8HI
8920 (vec_select:V16QI (match_dup 1)
8921 (parallel [(const_int 1)
8922 (const_int 3)
8923 (const_int 5)
8924 (const_int 7)
8925 (const_int 9)
8926 (const_int 11)
8927 (const_int 13)
8928 (const_int 15)])))
8929 (sign_extend:V8HI
8930 (vec_select:V16QI (match_dup 2)
8931 (parallel [(const_int 1)
8932 (const_int 3)
8933 (const_int 5)
8934 (const_int 7)
8935 (const_int 9)
8936 (const_int 11)
8937 (const_int 13)
8938 (const_int 15)]))))))]
8939 "TARGET_SSSE3"
8940 "pmaddubsw\t{%2, %0|%0, %2}"
8941 [(set_attr "type" "sseiadd")
8942 (set_attr "atom_unit" "simul")
8943 (set_attr "prefix_data16" "1")
8944 (set_attr "prefix_extra" "1")
8945 (set_attr "mode" "TI")])
8946
8947 (define_insn "ssse3_pmaddubsw"
8948 [(set (match_operand:V4HI 0 "register_operand" "=y")
8949 (ss_plus:V4HI
8950 (mult:V4HI
8951 (zero_extend:V4HI
8952 (vec_select:V4QI
8953 (match_operand:V8QI 1 "register_operand" "0")
8954 (parallel [(const_int 0)
8955 (const_int 2)
8956 (const_int 4)
8957 (const_int 6)])))
8958 (sign_extend:V4HI
8959 (vec_select:V4QI
8960 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8961 (parallel [(const_int 0)
8962 (const_int 2)
8963 (const_int 4)
8964 (const_int 6)]))))
8965 (mult:V4HI
8966 (zero_extend:V4HI
8967 (vec_select:V8QI (match_dup 1)
8968 (parallel [(const_int 1)
8969 (const_int 3)
8970 (const_int 5)
8971 (const_int 7)])))
8972 (sign_extend:V4HI
8973 (vec_select:V8QI (match_dup 2)
8974 (parallel [(const_int 1)
8975 (const_int 3)
8976 (const_int 5)
8977 (const_int 7)]))))))]
8978 "TARGET_SSSE3"
8979 "pmaddubsw\t{%2, %0|%0, %2}"
8980 [(set_attr "type" "sseiadd")
8981 (set_attr "atom_unit" "simul")
8982 (set_attr "prefix_extra" "1")
8983 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8984 (set_attr "mode" "DI")])
8985
8986 (define_expand "ssse3_pmulhrswv8hi3"
8987 [(set (match_operand:V8HI 0 "register_operand" "")
8988 (truncate:V8HI
8989 (lshiftrt:V8SI
8990 (plus:V8SI
8991 (lshiftrt:V8SI
8992 (mult:V8SI
8993 (sign_extend:V8SI
8994 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8995 (sign_extend:V8SI
8996 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8997 (const_int 14))
8998 (const_vector:V8HI [(const_int 1) (const_int 1)
8999 (const_int 1) (const_int 1)
9000 (const_int 1) (const_int 1)
9001 (const_int 1) (const_int 1)]))
9002 (const_int 1))))]
9003 "TARGET_SSSE3"
9004 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9005
9006 (define_insn "*avx_pmulhrswv8hi3"
9007 [(set (match_operand:V8HI 0 "register_operand" "=x")
9008 (truncate:V8HI
9009 (lshiftrt:V8SI
9010 (plus:V8SI
9011 (lshiftrt:V8SI
9012 (mult:V8SI
9013 (sign_extend:V8SI
9014 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9015 (sign_extend:V8SI
9016 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9017 (const_int 14))
9018 (const_vector:V8HI [(const_int 1) (const_int 1)
9019 (const_int 1) (const_int 1)
9020 (const_int 1) (const_int 1)
9021 (const_int 1) (const_int 1)]))
9022 (const_int 1))))]
9023 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9024 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9025 [(set_attr "type" "sseimul")
9026 (set_attr "prefix_extra" "1")
9027 (set_attr "prefix" "vex")
9028 (set_attr "mode" "TI")])
9029
9030 (define_insn "*ssse3_pmulhrswv8hi3"
9031 [(set (match_operand:V8HI 0 "register_operand" "=x")
9032 (truncate:V8HI
9033 (lshiftrt:V8SI
9034 (plus:V8SI
9035 (lshiftrt:V8SI
9036 (mult:V8SI
9037 (sign_extend:V8SI
9038 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9039 (sign_extend:V8SI
9040 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9041 (const_int 14))
9042 (const_vector:V8HI [(const_int 1) (const_int 1)
9043 (const_int 1) (const_int 1)
9044 (const_int 1) (const_int 1)
9045 (const_int 1) (const_int 1)]))
9046 (const_int 1))))]
9047 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9048 "pmulhrsw\t{%2, %0|%0, %2}"
9049 [(set_attr "type" "sseimul")
9050 (set_attr "prefix_data16" "1")
9051 (set_attr "prefix_extra" "1")
9052 (set_attr "mode" "TI")])
9053
9054 (define_expand "ssse3_pmulhrswv4hi3"
9055 [(set (match_operand:V4HI 0 "register_operand" "")
9056 (truncate:V4HI
9057 (lshiftrt:V4SI
9058 (plus:V4SI
9059 (lshiftrt:V4SI
9060 (mult:V4SI
9061 (sign_extend:V4SI
9062 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9063 (sign_extend:V4SI
9064 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9065 (const_int 14))
9066 (const_vector:V4HI [(const_int 1) (const_int 1)
9067 (const_int 1) (const_int 1)]))
9068 (const_int 1))))]
9069 "TARGET_SSSE3"
9070 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9071
9072 (define_insn "*ssse3_pmulhrswv4hi3"
9073 [(set (match_operand:V4HI 0 "register_operand" "=y")
9074 (truncate:V4HI
9075 (lshiftrt:V4SI
9076 (plus:V4SI
9077 (lshiftrt:V4SI
9078 (mult:V4SI
9079 (sign_extend:V4SI
9080 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9081 (sign_extend:V4SI
9082 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9083 (const_int 14))
9084 (const_vector:V4HI [(const_int 1) (const_int 1)
9085 (const_int 1) (const_int 1)]))
9086 (const_int 1))))]
9087 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9088 "pmulhrsw\t{%2, %0|%0, %2}"
9089 [(set_attr "type" "sseimul")
9090 (set_attr "prefix_extra" "1")
9091 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9092 (set_attr "mode" "DI")])
9093
9094 (define_insn "*avx_pshufbv16qi3"
9095 [(set (match_operand:V16QI 0 "register_operand" "=x")
9096 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9097 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9098 UNSPEC_PSHUFB))]
9099 "TARGET_AVX"
9100 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9101 [(set_attr "type" "sselog1")
9102 (set_attr "prefix_extra" "1")
9103 (set_attr "prefix" "vex")
9104 (set_attr "mode" "TI")])
9105
9106 (define_insn "ssse3_pshufbv16qi3"
9107 [(set (match_operand:V16QI 0 "register_operand" "=x")
9108 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9109 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9110 UNSPEC_PSHUFB))]
9111 "TARGET_SSSE3"
9112 "pshufb\t{%2, %0|%0, %2}";
9113 [(set_attr "type" "sselog1")
9114 (set_attr "prefix_data16" "1")
9115 (set_attr "prefix_extra" "1")
9116 (set_attr "mode" "TI")])
9117
9118 (define_insn "ssse3_pshufbv8qi3"
9119 [(set (match_operand:V8QI 0 "register_operand" "=y")
9120 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9121 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9122 UNSPEC_PSHUFB))]
9123 "TARGET_SSSE3"
9124 "pshufb\t{%2, %0|%0, %2}";
9125 [(set_attr "type" "sselog1")
9126 (set_attr "prefix_extra" "1")
9127 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9128 (set_attr "mode" "DI")])
9129
9130 (define_insn "*avx_psign<mode>3"
9131 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9132 (unspec:SSEMODE124
9133 [(match_operand:SSEMODE124 1 "register_operand" "x")
9134 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9135 UNSPEC_PSIGN))]
9136 "TARGET_AVX"
9137 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9138 [(set_attr "type" "sselog1")
9139 (set_attr "prefix_extra" "1")
9140 (set_attr "prefix" "vex")
9141 (set_attr "mode" "TI")])
9142
9143 (define_insn "ssse3_psign<mode>3"
9144 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9145 (unspec:SSEMODE124
9146 [(match_operand:SSEMODE124 1 "register_operand" "0")
9147 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9148 UNSPEC_PSIGN))]
9149 "TARGET_SSSE3"
9150 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9151 [(set_attr "type" "sselog1")
9152 (set_attr "prefix_data16" "1")
9153 (set_attr "prefix_extra" "1")
9154 (set_attr "mode" "TI")])
9155
9156 (define_insn "ssse3_psign<mode>3"
9157 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9158 (unspec:MMXMODEI
9159 [(match_operand:MMXMODEI 1 "register_operand" "0")
9160 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9161 UNSPEC_PSIGN))]
9162 "TARGET_SSSE3"
9163 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9164 [(set_attr "type" "sselog1")
9165 (set_attr "prefix_extra" "1")
9166 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9167 (set_attr "mode" "DI")])
9168
9169 (define_insn "*avx_palignrti"
9170 [(set (match_operand:TI 0 "register_operand" "=x")
9171 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9172 (match_operand:TI 2 "nonimmediate_operand" "xm")
9173 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9174 UNSPEC_PALIGNR))]
9175 "TARGET_AVX"
9176 {
9177 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9178 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9179 }
9180 [(set_attr "type" "sseishft")
9181 (set_attr "prefix_extra" "1")
9182 (set_attr "length_immediate" "1")
9183 (set_attr "prefix" "vex")
9184 (set_attr "mode" "TI")])
9185
9186 (define_insn "ssse3_palignrti"
9187 [(set (match_operand:TI 0 "register_operand" "=x")
9188 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9189 (match_operand:TI 2 "nonimmediate_operand" "xm")
9190 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9191 UNSPEC_PALIGNR))]
9192 "TARGET_SSSE3"
9193 {
9194 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9195 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9196 }
9197 [(set_attr "type" "sseishft")
9198 (set_attr "atom_unit" "sishuf")
9199 (set_attr "prefix_data16" "1")
9200 (set_attr "prefix_extra" "1")
9201 (set_attr "length_immediate" "1")
9202 (set_attr "mode" "TI")])
9203
9204 (define_insn "ssse3_palignrdi"
9205 [(set (match_operand:DI 0 "register_operand" "=y")
9206 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9207 (match_operand:DI 2 "nonimmediate_operand" "ym")
9208 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9209 UNSPEC_PALIGNR))]
9210 "TARGET_SSSE3"
9211 {
9212 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9213 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9214 }
9215 [(set_attr "type" "sseishft")
9216 (set_attr "atom_unit" "sishuf")
9217 (set_attr "prefix_extra" "1")
9218 (set_attr "length_immediate" "1")
9219 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9220 (set_attr "mode" "DI")])
9221
9222 (define_insn "abs<mode>2"
9223 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9224 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9225 "TARGET_SSSE3"
9226 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9227 [(set_attr "type" "sselog1")
9228 (set_attr "prefix_data16" "1")
9229 (set_attr "prefix_extra" "1")
9230 (set_attr "prefix" "maybe_vex")
9231 (set_attr "mode" "TI")])
9232
9233 (define_insn "abs<mode>2"
9234 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9235 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9236 "TARGET_SSSE3"
9237 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9238 [(set_attr "type" "sselog1")
9239 (set_attr "prefix_rep" "0")
9240 (set_attr "prefix_extra" "1")
9241 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9242 (set_attr "mode" "DI")])
9243
9244 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9245 ;;
9246 ;; AMD SSE4A instructions
9247 ;;
9248 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9249
9250 (define_insn "sse4a_movnt<mode>"
9251 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9252 (unspec:MODEF
9253 [(match_operand:MODEF 1 "register_operand" "x")]
9254 UNSPEC_MOVNT))]
9255 "TARGET_SSE4A"
9256 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9257 [(set_attr "type" "ssemov")
9258 (set_attr "mode" "<MODE>")])
9259
9260 (define_insn "sse4a_vmmovnt<mode>"
9261 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9262 (unspec:<ssescalarmode>
9263 [(vec_select:<ssescalarmode>
9264 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9265 (parallel [(const_int 0)]))]
9266 UNSPEC_MOVNT))]
9267 "TARGET_SSE4A"
9268 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9269 [(set_attr "type" "ssemov")
9270 (set_attr "mode" "<ssescalarmode>")])
9271
9272 (define_insn "sse4a_extrqi"
9273 [(set (match_operand:V2DI 0 "register_operand" "=x")
9274 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9275 (match_operand 2 "const_int_operand" "")
9276 (match_operand 3 "const_int_operand" "")]
9277 UNSPEC_EXTRQI))]
9278 "TARGET_SSE4A"
9279 "extrq\t{%3, %2, %0|%0, %2, %3}"
9280 [(set_attr "type" "sse")
9281 (set_attr "prefix_data16" "1")
9282 (set_attr "length_immediate" "2")
9283 (set_attr "mode" "TI")])
9284
9285 (define_insn "sse4a_extrq"
9286 [(set (match_operand:V2DI 0 "register_operand" "=x")
9287 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9288 (match_operand:V16QI 2 "register_operand" "x")]
9289 UNSPEC_EXTRQ))]
9290 "TARGET_SSE4A"
9291 "extrq\t{%2, %0|%0, %2}"
9292 [(set_attr "type" "sse")
9293 (set_attr "prefix_data16" "1")
9294 (set_attr "mode" "TI")])
9295
9296 (define_insn "sse4a_insertqi"
9297 [(set (match_operand:V2DI 0 "register_operand" "=x")
9298 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9299 (match_operand:V2DI 2 "register_operand" "x")
9300 (match_operand 3 "const_int_operand" "")
9301 (match_operand 4 "const_int_operand" "")]
9302 UNSPEC_INSERTQI))]
9303 "TARGET_SSE4A"
9304 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9305 [(set_attr "type" "sseins")
9306 (set_attr "prefix_data16" "0")
9307 (set_attr "prefix_rep" "1")
9308 (set_attr "length_immediate" "2")
9309 (set_attr "mode" "TI")])
9310
9311 (define_insn "sse4a_insertq"
9312 [(set (match_operand:V2DI 0 "register_operand" "=x")
9313 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9314 (match_operand:V2DI 2 "register_operand" "x")]
9315 UNSPEC_INSERTQ))]
9316 "TARGET_SSE4A"
9317 "insertq\t{%2, %0|%0, %2}"
9318 [(set_attr "type" "sseins")
9319 (set_attr "prefix_data16" "0")
9320 (set_attr "prefix_rep" "1")
9321 (set_attr "mode" "TI")])
9322
9323 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9324 ;;
9325 ;; Intel SSE4.1 instructions
9326 ;;
9327 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9328
9329 (define_insn "avx_blend<ssemodesuffix><avxmodesuffix>"
9330 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9331 (vec_merge:AVXMODEF2P
9332 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9333 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9334 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9335 "TARGET_AVX"
9336 "vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9337 [(set_attr "type" "ssemov")
9338 (set_attr "prefix_extra" "1")
9339 (set_attr "length_immediate" "1")
9340 (set_attr "prefix" "vex")
9341 (set_attr "mode" "<avxvecmode>")])
9342
9343 (define_insn "avx_blendv<ssemodesuffix><avxmodesuffix>"
9344 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9345 (unspec:AVXMODEF2P
9346 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9347 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9348 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9349 UNSPEC_BLENDV))]
9350 "TARGET_AVX"
9351 "vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9352 [(set_attr "type" "ssemov")
9353 (set_attr "prefix_extra" "1")
9354 (set_attr "length_immediate" "1")
9355 (set_attr "prefix" "vex")
9356 (set_attr "mode" "<avxvecmode>")])
9357
9358 (define_insn "sse4_1_blend<ssemodesuffix>"
9359 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9360 (vec_merge:SSEMODEF2P
9361 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9362 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9363 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9364 "TARGET_SSE4_1"
9365 "blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9366 [(set_attr "type" "ssemov")
9367 (set_attr "prefix_data16" "1")
9368 (set_attr "prefix_extra" "1")
9369 (set_attr "length_immediate" "1")
9370 (set_attr "mode" "<MODE>")])
9371
9372 (define_insn "sse4_1_blendv<ssemodesuffix>"
9373 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9374 (unspec:SSEMODEF2P
9375 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9376 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9377 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9378 UNSPEC_BLENDV))]
9379 "TARGET_SSE4_1"
9380 "blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9381 [(set_attr "type" "ssemov")
9382 (set_attr "prefix_data16" "1")
9383 (set_attr "prefix_extra" "1")
9384 (set_attr "mode" "<MODE>")])
9385
9386 (define_insn "avx_dp<ssemodesuffix><avxmodesuffix>"
9387 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9388 (unspec:AVXMODEF2P
9389 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9390 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9391 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9392 UNSPEC_DP))]
9393 "TARGET_AVX"
9394 "vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9395 [(set_attr "type" "ssemul")
9396 (set_attr "prefix" "vex")
9397 (set_attr "prefix_extra" "1")
9398 (set_attr "length_immediate" "1")
9399 (set_attr "mode" "<avxvecmode>")])
9400
9401 (define_insn "sse4_1_dp<ssemodesuffix>"
9402 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9403 (unspec:SSEMODEF2P
9404 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9405 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9406 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9407 UNSPEC_DP))]
9408 "TARGET_SSE4_1"
9409 "dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9410 [(set_attr "type" "ssemul")
9411 (set_attr "prefix_data16" "1")
9412 (set_attr "prefix_extra" "1")
9413 (set_attr "length_immediate" "1")
9414 (set_attr "mode" "<MODE>")])
9415
9416 (define_insn "sse4_1_movntdqa"
9417 [(set (match_operand:V2DI 0 "register_operand" "=x")
9418 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9419 UNSPEC_MOVNTDQA))]
9420 "TARGET_SSE4_1"
9421 "%vmovntdqa\t{%1, %0|%0, %1}"
9422 [(set_attr "type" "ssemov")
9423 (set_attr "prefix_extra" "1")
9424 (set_attr "prefix" "maybe_vex")
9425 (set_attr "mode" "TI")])
9426
9427 (define_insn "*avx_mpsadbw"
9428 [(set (match_operand:V16QI 0 "register_operand" "=x")
9429 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9430 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9431 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9432 UNSPEC_MPSADBW))]
9433 "TARGET_AVX"
9434 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9435 [(set_attr "type" "sselog1")
9436 (set_attr "prefix" "vex")
9437 (set_attr "prefix_extra" "1")
9438 (set_attr "length_immediate" "1")
9439 (set_attr "mode" "TI")])
9440
9441 (define_insn "sse4_1_mpsadbw"
9442 [(set (match_operand:V16QI 0 "register_operand" "=x")
9443 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9444 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9445 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9446 UNSPEC_MPSADBW))]
9447 "TARGET_SSE4_1"
9448 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9449 [(set_attr "type" "sselog1")
9450 (set_attr "prefix_extra" "1")
9451 (set_attr "length_immediate" "1")
9452 (set_attr "mode" "TI")])
9453
9454 (define_insn "*avx_packusdw"
9455 [(set (match_operand:V8HI 0 "register_operand" "=x")
9456 (vec_concat:V8HI
9457 (us_truncate:V4HI
9458 (match_operand:V4SI 1 "register_operand" "x"))
9459 (us_truncate:V4HI
9460 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9461 "TARGET_AVX"
9462 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9463 [(set_attr "type" "sselog")
9464 (set_attr "prefix_extra" "1")
9465 (set_attr "prefix" "vex")
9466 (set_attr "mode" "TI")])
9467
9468 (define_insn "sse4_1_packusdw"
9469 [(set (match_operand:V8HI 0 "register_operand" "=x")
9470 (vec_concat:V8HI
9471 (us_truncate:V4HI
9472 (match_operand:V4SI 1 "register_operand" "0"))
9473 (us_truncate:V4HI
9474 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9475 "TARGET_SSE4_1"
9476 "packusdw\t{%2, %0|%0, %2}"
9477 [(set_attr "type" "sselog")
9478 (set_attr "prefix_extra" "1")
9479 (set_attr "mode" "TI")])
9480
9481 (define_insn "*avx_pblendvb"
9482 [(set (match_operand:V16QI 0 "register_operand" "=x")
9483 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9484 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9485 (match_operand:V16QI 3 "register_operand" "x")]
9486 UNSPEC_BLENDV))]
9487 "TARGET_AVX"
9488 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9489 [(set_attr "type" "ssemov")
9490 (set_attr "prefix_extra" "1")
9491 (set_attr "length_immediate" "1")
9492 (set_attr "prefix" "vex")
9493 (set_attr "mode" "TI")])
9494
9495 (define_insn "sse4_1_pblendvb"
9496 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9497 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9498 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9499 (match_operand:V16QI 3 "register_operand" "Yz")]
9500 UNSPEC_BLENDV))]
9501 "TARGET_SSE4_1"
9502 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9503 [(set_attr "type" "ssemov")
9504 (set_attr "prefix_extra" "1")
9505 (set_attr "mode" "TI")])
9506
9507 (define_insn "*avx_pblendw"
9508 [(set (match_operand:V8HI 0 "register_operand" "=x")
9509 (vec_merge:V8HI
9510 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9511 (match_operand:V8HI 1 "register_operand" "x")
9512 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9513 "TARGET_AVX"
9514 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9515 [(set_attr "type" "ssemov")
9516 (set_attr "prefix" "vex")
9517 (set_attr "prefix_extra" "1")
9518 (set_attr "length_immediate" "1")
9519 (set_attr "mode" "TI")])
9520
9521 (define_insn "sse4_1_pblendw"
9522 [(set (match_operand:V8HI 0 "register_operand" "=x")
9523 (vec_merge:V8HI
9524 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9525 (match_operand:V8HI 1 "register_operand" "0")
9526 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9527 "TARGET_SSE4_1"
9528 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9529 [(set_attr "type" "ssemov")
9530 (set_attr "prefix_extra" "1")
9531 (set_attr "length_immediate" "1")
9532 (set_attr "mode" "TI")])
9533
9534 (define_insn "sse4_1_phminposuw"
9535 [(set (match_operand:V8HI 0 "register_operand" "=x")
9536 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9537 UNSPEC_PHMINPOSUW))]
9538 "TARGET_SSE4_1"
9539 "%vphminposuw\t{%1, %0|%0, %1}"
9540 [(set_attr "type" "sselog1")
9541 (set_attr "prefix_extra" "1")
9542 (set_attr "prefix" "maybe_vex")
9543 (set_attr "mode" "TI")])
9544
9545 (define_insn "sse4_1_<code>v8qiv8hi2"
9546 [(set (match_operand:V8HI 0 "register_operand" "=x")
9547 (any_extend:V8HI
9548 (vec_select:V8QI
9549 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9550 (parallel [(const_int 0)
9551 (const_int 1)
9552 (const_int 2)
9553 (const_int 3)
9554 (const_int 4)
9555 (const_int 5)
9556 (const_int 6)
9557 (const_int 7)]))))]
9558 "TARGET_SSE4_1"
9559 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9560 [(set_attr "type" "ssemov")
9561 (set_attr "prefix_extra" "1")
9562 (set_attr "prefix" "maybe_vex")
9563 (set_attr "mode" "TI")])
9564
9565 (define_insn "sse4_1_<code>v4qiv4si2"
9566 [(set (match_operand:V4SI 0 "register_operand" "=x")
9567 (any_extend:V4SI
9568 (vec_select:V4QI
9569 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9570 (parallel [(const_int 0)
9571 (const_int 1)
9572 (const_int 2)
9573 (const_int 3)]))))]
9574 "TARGET_SSE4_1"
9575 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9576 [(set_attr "type" "ssemov")
9577 (set_attr "prefix_extra" "1")
9578 (set_attr "prefix" "maybe_vex")
9579 (set_attr "mode" "TI")])
9580
9581 (define_insn "sse4_1_<code>v4hiv4si2"
9582 [(set (match_operand:V4SI 0 "register_operand" "=x")
9583 (any_extend:V4SI
9584 (vec_select:V4HI
9585 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9586 (parallel [(const_int 0)
9587 (const_int 1)
9588 (const_int 2)
9589 (const_int 3)]))))]
9590 "TARGET_SSE4_1"
9591 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9592 [(set_attr "type" "ssemov")
9593 (set_attr "prefix_extra" "1")
9594 (set_attr "prefix" "maybe_vex")
9595 (set_attr "mode" "TI")])
9596
9597 (define_insn "sse4_1_<code>v2qiv2di2"
9598 [(set (match_operand:V2DI 0 "register_operand" "=x")
9599 (any_extend:V2DI
9600 (vec_select:V2QI
9601 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9602 (parallel [(const_int 0)
9603 (const_int 1)]))))]
9604 "TARGET_SSE4_1"
9605 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9606 [(set_attr "type" "ssemov")
9607 (set_attr "prefix_extra" "1")
9608 (set_attr "prefix" "maybe_vex")
9609 (set_attr "mode" "TI")])
9610
9611 (define_insn "sse4_1_<code>v2hiv2di2"
9612 [(set (match_operand:V2DI 0 "register_operand" "=x")
9613 (any_extend:V2DI
9614 (vec_select:V2HI
9615 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9616 (parallel [(const_int 0)
9617 (const_int 1)]))))]
9618 "TARGET_SSE4_1"
9619 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9620 [(set_attr "type" "ssemov")
9621 (set_attr "prefix_extra" "1")
9622 (set_attr "prefix" "maybe_vex")
9623 (set_attr "mode" "TI")])
9624
9625 (define_insn "sse4_1_<code>v2siv2di2"
9626 [(set (match_operand:V2DI 0 "register_operand" "=x")
9627 (any_extend:V2DI
9628 (vec_select:V2SI
9629 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9630 (parallel [(const_int 0)
9631 (const_int 1)]))))]
9632 "TARGET_SSE4_1"
9633 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9634 [(set_attr "type" "ssemov")
9635 (set_attr "prefix_extra" "1")
9636 (set_attr "prefix" "maybe_vex")
9637 (set_attr "mode" "TI")])
9638
9639 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9640 ;; setting FLAGS_REG. But it is not a really compare instruction.
9641 (define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
9642 [(set (reg:CC FLAGS_REG)
9643 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9644 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9645 UNSPEC_VTESTP))]
9646 "TARGET_AVX"
9647 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9648 [(set_attr "type" "ssecomi")
9649 (set_attr "prefix_extra" "1")
9650 (set_attr "prefix" "vex")
9651 (set_attr "mode" "<MODE>")])
9652
9653 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9654 ;; But it is not a really compare instruction.
9655 (define_insn "avx_ptest256"
9656 [(set (reg:CC FLAGS_REG)
9657 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9658 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9659 UNSPEC_PTEST))]
9660 "TARGET_AVX"
9661 "vptest\t{%1, %0|%0, %1}"
9662 [(set_attr "type" "ssecomi")
9663 (set_attr "prefix_extra" "1")
9664 (set_attr "prefix" "vex")
9665 (set_attr "mode" "OI")])
9666
9667 (define_insn "sse4_1_ptest"
9668 [(set (reg:CC FLAGS_REG)
9669 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9670 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9671 UNSPEC_PTEST))]
9672 "TARGET_SSE4_1"
9673 "%vptest\t{%1, %0|%0, %1}"
9674 [(set_attr "type" "ssecomi")
9675 (set_attr "prefix_extra" "1")
9676 (set_attr "prefix" "maybe_vex")
9677 (set_attr "mode" "TI")])
9678
9679 (define_insn "avx_round<ssemodesuffix>256"
9680 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9681 (unspec:AVX256MODEF2P
9682 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9683 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9684 UNSPEC_ROUND))]
9685 "TARGET_AVX"
9686 "vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9687 [(set_attr "type" "ssecvt")
9688 (set_attr "prefix_extra" "1")
9689 (set_attr "length_immediate" "1")
9690 (set_attr "prefix" "vex")
9691 (set_attr "mode" "<MODE>")])
9692
9693 (define_insn "sse4_1_round<ssemodesuffix>"
9694 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9695 (unspec:SSEMODEF2P
9696 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9697 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9698 UNSPEC_ROUND))]
9699 "TARGET_ROUND"
9700 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9701 [(set_attr "type" "ssecvt")
9702 (set_attr "prefix_data16" "1")
9703 (set_attr "prefix_extra" "1")
9704 (set_attr "length_immediate" "1")
9705 (set_attr "prefix" "maybe_vex")
9706 (set_attr "mode" "<MODE>")])
9707
9708 (define_insn "*avx_round<ssescalarmodesuffix>"
9709 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9710 (vec_merge:SSEMODEF2P
9711 (unspec:SSEMODEF2P
9712 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9713 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9714 UNSPEC_ROUND)
9715 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9716 (const_int 1)))]
9717 "TARGET_AVX"
9718 "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9719 [(set_attr "type" "ssecvt")
9720 (set_attr "prefix_extra" "1")
9721 (set_attr "length_immediate" "1")
9722 (set_attr "prefix" "vex")
9723 (set_attr "mode" "<MODE>")])
9724
9725 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9726 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9727 (vec_merge:SSEMODEF2P
9728 (unspec:SSEMODEF2P
9729 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9730 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9731 UNSPEC_ROUND)
9732 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9733 (const_int 1)))]
9734 "TARGET_ROUND"
9735 "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9736 [(set_attr "type" "ssecvt")
9737 (set_attr "prefix_data16" "1")
9738 (set_attr "prefix_extra" "1")
9739 (set_attr "length_immediate" "1")
9740 (set_attr "mode" "<MODE>")])
9741
9742 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9743 ;;
9744 ;; Intel SSE4.2 string/text processing instructions
9745 ;;
9746 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9747
9748 (define_insn_and_split "sse4_2_pcmpestr"
9749 [(set (match_operand:SI 0 "register_operand" "=c,c")
9750 (unspec:SI
9751 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9752 (match_operand:SI 3 "register_operand" "a,a")
9753 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9754 (match_operand:SI 5 "register_operand" "d,d")
9755 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9756 UNSPEC_PCMPESTR))
9757 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9758 (unspec:V16QI
9759 [(match_dup 2)
9760 (match_dup 3)
9761 (match_dup 4)
9762 (match_dup 5)
9763 (match_dup 6)]
9764 UNSPEC_PCMPESTR))
9765 (set (reg:CC FLAGS_REG)
9766 (unspec:CC
9767 [(match_dup 2)
9768 (match_dup 3)
9769 (match_dup 4)
9770 (match_dup 5)
9771 (match_dup 6)]
9772 UNSPEC_PCMPESTR))]
9773 "TARGET_SSE4_2
9774 && can_create_pseudo_p ()"
9775 "#"
9776 "&& 1"
9777 [(const_int 0)]
9778 {
9779 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9780 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9781 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9782
9783 if (ecx)
9784 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9785 operands[3], operands[4],
9786 operands[5], operands[6]));
9787 if (xmm0)
9788 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9789 operands[3], operands[4],
9790 operands[5], operands[6]));
9791 if (flags && !(ecx || xmm0))
9792 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9793 operands[2], operands[3],
9794 operands[4], operands[5],
9795 operands[6]));
9796 DONE;
9797 }
9798 [(set_attr "type" "sselog")
9799 (set_attr "prefix_data16" "1")
9800 (set_attr "prefix_extra" "1")
9801 (set_attr "length_immediate" "1")
9802 (set_attr "memory" "none,load")
9803 (set_attr "mode" "TI")])
9804
9805 (define_insn "sse4_2_pcmpestri"
9806 [(set (match_operand:SI 0 "register_operand" "=c,c")
9807 (unspec:SI
9808 [(match_operand:V16QI 1 "register_operand" "x,x")
9809 (match_operand:SI 2 "register_operand" "a,a")
9810 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9811 (match_operand:SI 4 "register_operand" "d,d")
9812 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9813 UNSPEC_PCMPESTR))
9814 (set (reg:CC FLAGS_REG)
9815 (unspec:CC
9816 [(match_dup 1)
9817 (match_dup 2)
9818 (match_dup 3)
9819 (match_dup 4)
9820 (match_dup 5)]
9821 UNSPEC_PCMPESTR))]
9822 "TARGET_SSE4_2"
9823 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9824 [(set_attr "type" "sselog")
9825 (set_attr "prefix_data16" "1")
9826 (set_attr "prefix_extra" "1")
9827 (set_attr "prefix" "maybe_vex")
9828 (set_attr "length_immediate" "1")
9829 (set_attr "memory" "none,load")
9830 (set_attr "mode" "TI")])
9831
9832 (define_insn "sse4_2_pcmpestrm"
9833 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9834 (unspec:V16QI
9835 [(match_operand:V16QI 1 "register_operand" "x,x")
9836 (match_operand:SI 2 "register_operand" "a,a")
9837 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9838 (match_operand:SI 4 "register_operand" "d,d")
9839 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9840 UNSPEC_PCMPESTR))
9841 (set (reg:CC FLAGS_REG)
9842 (unspec:CC
9843 [(match_dup 1)
9844 (match_dup 2)
9845 (match_dup 3)
9846 (match_dup 4)
9847 (match_dup 5)]
9848 UNSPEC_PCMPESTR))]
9849 "TARGET_SSE4_2"
9850 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9851 [(set_attr "type" "sselog")
9852 (set_attr "prefix_data16" "1")
9853 (set_attr "prefix_extra" "1")
9854 (set_attr "length_immediate" "1")
9855 (set_attr "prefix" "maybe_vex")
9856 (set_attr "memory" "none,load")
9857 (set_attr "mode" "TI")])
9858
9859 (define_insn "sse4_2_pcmpestr_cconly"
9860 [(set (reg:CC FLAGS_REG)
9861 (unspec:CC
9862 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9863 (match_operand:SI 3 "register_operand" "a,a,a,a")
9864 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9865 (match_operand:SI 5 "register_operand" "d,d,d,d")
9866 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9867 UNSPEC_PCMPESTR))
9868 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9869 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9870 "TARGET_SSE4_2"
9871 "@
9872 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9873 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9874 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9875 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9876 [(set_attr "type" "sselog")
9877 (set_attr "prefix_data16" "1")
9878 (set_attr "prefix_extra" "1")
9879 (set_attr "length_immediate" "1")
9880 (set_attr "memory" "none,load,none,load")
9881 (set_attr "prefix" "maybe_vex")
9882 (set_attr "mode" "TI")])
9883
9884 (define_insn_and_split "sse4_2_pcmpistr"
9885 [(set (match_operand:SI 0 "register_operand" "=c,c")
9886 (unspec:SI
9887 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9888 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9889 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9890 UNSPEC_PCMPISTR))
9891 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9892 (unspec:V16QI
9893 [(match_dup 2)
9894 (match_dup 3)
9895 (match_dup 4)]
9896 UNSPEC_PCMPISTR))
9897 (set (reg:CC FLAGS_REG)
9898 (unspec:CC
9899 [(match_dup 2)
9900 (match_dup 3)
9901 (match_dup 4)]
9902 UNSPEC_PCMPISTR))]
9903 "TARGET_SSE4_2
9904 && can_create_pseudo_p ()"
9905 "#"
9906 "&& 1"
9907 [(const_int 0)]
9908 {
9909 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9910 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9911 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9912
9913 if (ecx)
9914 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
9915 operands[3], operands[4]));
9916 if (xmm0)
9917 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
9918 operands[3], operands[4]));
9919 if (flags && !(ecx || xmm0))
9920 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
9921 operands[2], operands[3],
9922 operands[4]));
9923 DONE;
9924 }
9925 [(set_attr "type" "sselog")
9926 (set_attr "prefix_data16" "1")
9927 (set_attr "prefix_extra" "1")
9928 (set_attr "length_immediate" "1")
9929 (set_attr "memory" "none,load")
9930 (set_attr "mode" "TI")])
9931
9932 (define_insn "sse4_2_pcmpistri"
9933 [(set (match_operand:SI 0 "register_operand" "=c,c")
9934 (unspec:SI
9935 [(match_operand:V16QI 1 "register_operand" "x,x")
9936 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9937 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9938 UNSPEC_PCMPISTR))
9939 (set (reg:CC FLAGS_REG)
9940 (unspec:CC
9941 [(match_dup 1)
9942 (match_dup 2)
9943 (match_dup 3)]
9944 UNSPEC_PCMPISTR))]
9945 "TARGET_SSE4_2"
9946 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9947 [(set_attr "type" "sselog")
9948 (set_attr "prefix_data16" "1")
9949 (set_attr "prefix_extra" "1")
9950 (set_attr "length_immediate" "1")
9951 (set_attr "prefix" "maybe_vex")
9952 (set_attr "memory" "none,load")
9953 (set_attr "mode" "TI")])
9954
9955 (define_insn "sse4_2_pcmpistrm"
9956 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9957 (unspec:V16QI
9958 [(match_operand:V16QI 1 "register_operand" "x,x")
9959 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9960 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9961 UNSPEC_PCMPISTR))
9962 (set (reg:CC FLAGS_REG)
9963 (unspec:CC
9964 [(match_dup 1)
9965 (match_dup 2)
9966 (match_dup 3)]
9967 UNSPEC_PCMPISTR))]
9968 "TARGET_SSE4_2"
9969 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9970 [(set_attr "type" "sselog")
9971 (set_attr "prefix_data16" "1")
9972 (set_attr "prefix_extra" "1")
9973 (set_attr "length_immediate" "1")
9974 (set_attr "prefix" "maybe_vex")
9975 (set_attr "memory" "none,load")
9976 (set_attr "mode" "TI")])
9977
9978 (define_insn "sse4_2_pcmpistr_cconly"
9979 [(set (reg:CC FLAGS_REG)
9980 (unspec:CC
9981 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9982 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
9983 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
9984 UNSPEC_PCMPISTR))
9985 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9986 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9987 "TARGET_SSE4_2"
9988 "@
9989 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9990 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
9991 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9992 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9993 [(set_attr "type" "sselog")
9994 (set_attr "prefix_data16" "1")
9995 (set_attr "prefix_extra" "1")
9996 (set_attr "length_immediate" "1")
9997 (set_attr "memory" "none,load,none,load")
9998 (set_attr "prefix" "maybe_vex")
9999 (set_attr "mode" "TI")])
10000
10001 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10002 ;;
10003 ;; XOP instructions
10004 ;;
10005 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10006
10007 ;; XOP parallel integer multiply/add instructions.
10008 ;; Note the XOP multiply/add instructions
10009 ;; a[i] = b[i] * c[i] + d[i];
10010 ;; do not allow the value being added to be a memory operation.
10011 (define_insn "xop_pmacsww"
10012 [(set (match_operand:V8HI 0 "register_operand" "=x")
10013 (plus:V8HI
10014 (mult:V8HI
10015 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10016 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10017 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10018 "TARGET_XOP"
10019 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10020 [(set_attr "type" "ssemuladd")
10021 (set_attr "mode" "TI")])
10022
10023 (define_insn "xop_pmacssww"
10024 [(set (match_operand:V8HI 0 "register_operand" "=x")
10025 (ss_plus:V8HI
10026 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10027 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10028 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10029 "TARGET_XOP"
10030 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10031 [(set_attr "type" "ssemuladd")
10032 (set_attr "mode" "TI")])
10033
10034 (define_insn "xop_pmacsdd"
10035 [(set (match_operand:V4SI 0 "register_operand" "=x")
10036 (plus:V4SI
10037 (mult:V4SI
10038 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10039 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10040 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10041 "TARGET_XOP"
10042 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10043 [(set_attr "type" "ssemuladd")
10044 (set_attr "mode" "TI")])
10045
10046 (define_insn "xop_pmacssdd"
10047 [(set (match_operand:V4SI 0 "register_operand" "=x")
10048 (ss_plus:V4SI
10049 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10050 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10051 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10052 "TARGET_XOP"
10053 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10054 [(set_attr "type" "ssemuladd")
10055 (set_attr "mode" "TI")])
10056
10057 (define_insn "xop_pmacssdql"
10058 [(set (match_operand:V2DI 0 "register_operand" "=x")
10059 (ss_plus:V2DI
10060 (mult:V2DI
10061 (sign_extend:V2DI
10062 (vec_select:V2SI
10063 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10064 (parallel [(const_int 1)
10065 (const_int 3)])))
10066 (vec_select:V2SI
10067 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10068 (parallel [(const_int 1)
10069 (const_int 3)])))
10070 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10071 "TARGET_XOP"
10072 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10073 [(set_attr "type" "ssemuladd")
10074 (set_attr "mode" "TI")])
10075
10076 (define_insn "xop_pmacssdqh"
10077 [(set (match_operand:V2DI 0 "register_operand" "=x")
10078 (ss_plus:V2DI
10079 (mult:V2DI
10080 (sign_extend:V2DI
10081 (vec_select:V2SI
10082 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10083 (parallel [(const_int 0)
10084 (const_int 2)])))
10085 (sign_extend:V2DI
10086 (vec_select:V2SI
10087 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10088 (parallel [(const_int 0)
10089 (const_int 2)]))))
10090 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10091 "TARGET_XOP"
10092 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10093 [(set_attr "type" "ssemuladd")
10094 (set_attr "mode" "TI")])
10095
10096 (define_insn "xop_pmacsdql"
10097 [(set (match_operand:V2DI 0 "register_operand" "=x")
10098 (plus:V2DI
10099 (mult:V2DI
10100 (sign_extend:V2DI
10101 (vec_select:V2SI
10102 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10103 (parallel [(const_int 1)
10104 (const_int 3)])))
10105 (sign_extend:V2DI
10106 (vec_select:V2SI
10107 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10108 (parallel [(const_int 1)
10109 (const_int 3)]))))
10110 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10111 "TARGET_XOP"
10112 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10113 [(set_attr "type" "ssemuladd")
10114 (set_attr "mode" "TI")])
10115
10116 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10117 ;; fake it with a multiply/add. In general, we expect the define_split to
10118 ;; occur before register allocation, so we have to handle the corner case where
10119 ;; the target is the same as operands 1/2
10120 (define_insn_and_split "xop_mulv2div2di3_low"
10121 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10122 (mult:V2DI
10123 (sign_extend:V2DI
10124 (vec_select:V2SI
10125 (match_operand:V4SI 1 "register_operand" "%x")
10126 (parallel [(const_int 1)
10127 (const_int 3)])))
10128 (sign_extend:V2DI
10129 (vec_select:V2SI
10130 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10131 (parallel [(const_int 1)
10132 (const_int 3)])))))]
10133 "TARGET_XOP"
10134 "#"
10135 "&& reload_completed"
10136 [(set (match_dup 0)
10137 (match_dup 3))
10138 (set (match_dup 0)
10139 (plus:V2DI
10140 (mult:V2DI
10141 (sign_extend:V2DI
10142 (vec_select:V2SI
10143 (match_dup 1)
10144 (parallel [(const_int 1)
10145 (const_int 3)])))
10146 (sign_extend:V2DI
10147 (vec_select:V2SI
10148 (match_dup 2)
10149 (parallel [(const_int 1)
10150 (const_int 3)]))))
10151 (match_dup 0)))]
10152 {
10153 operands[3] = CONST0_RTX (V2DImode);
10154 }
10155 [(set_attr "type" "ssemul")
10156 (set_attr "mode" "TI")])
10157
10158 (define_insn "xop_pmacsdqh"
10159 [(set (match_operand:V2DI 0 "register_operand" "=x")
10160 (plus:V2DI
10161 (mult:V2DI
10162 (sign_extend:V2DI
10163 (vec_select:V2SI
10164 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10165 (parallel [(const_int 0)
10166 (const_int 2)])))
10167 (sign_extend:V2DI
10168 (vec_select:V2SI
10169 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10170 (parallel [(const_int 0)
10171 (const_int 2)]))))
10172 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10173 "TARGET_XOP"
10174 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10175 [(set_attr "type" "ssemuladd")
10176 (set_attr "mode" "TI")])
10177
10178 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10179 ;; fake it with a multiply/add. In general, we expect the define_split to
10180 ;; occur before register allocation, so we have to handle the corner case where
10181 ;; the target is the same as either operands[1] or operands[2]
10182 (define_insn_and_split "xop_mulv2div2di3_high"
10183 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10184 (mult:V2DI
10185 (sign_extend:V2DI
10186 (vec_select:V2SI
10187 (match_operand:V4SI 1 "register_operand" "%x")
10188 (parallel [(const_int 0)
10189 (const_int 2)])))
10190 (sign_extend:V2DI
10191 (vec_select:V2SI
10192 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10193 (parallel [(const_int 0)
10194 (const_int 2)])))))]
10195 "TARGET_XOP"
10196 "#"
10197 "&& reload_completed"
10198 [(set (match_dup 0)
10199 (match_dup 3))
10200 (set (match_dup 0)
10201 (plus:V2DI
10202 (mult:V2DI
10203 (sign_extend:V2DI
10204 (vec_select:V2SI
10205 (match_dup 1)
10206 (parallel [(const_int 0)
10207 (const_int 2)])))
10208 (sign_extend:V2DI
10209 (vec_select:V2SI
10210 (match_dup 2)
10211 (parallel [(const_int 0)
10212 (const_int 2)]))))
10213 (match_dup 0)))]
10214 {
10215 operands[3] = CONST0_RTX (V2DImode);
10216 }
10217 [(set_attr "type" "ssemul")
10218 (set_attr "mode" "TI")])
10219
10220 ;; XOP parallel integer multiply/add instructions for the intrinisics
10221 (define_insn "xop_pmacsswd"
10222 [(set (match_operand:V4SI 0 "register_operand" "=x")
10223 (ss_plus:V4SI
10224 (mult:V4SI
10225 (sign_extend:V4SI
10226 (vec_select:V4HI
10227 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10228 (parallel [(const_int 1)
10229 (const_int 3)
10230 (const_int 5)
10231 (const_int 7)])))
10232 (sign_extend:V4SI
10233 (vec_select:V4HI
10234 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10235 (parallel [(const_int 1)
10236 (const_int 3)
10237 (const_int 5)
10238 (const_int 7)]))))
10239 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10240 "TARGET_XOP"
10241 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10242 [(set_attr "type" "ssemuladd")
10243 (set_attr "mode" "TI")])
10244
10245 (define_insn "xop_pmacswd"
10246 [(set (match_operand:V4SI 0 "register_operand" "=x")
10247 (plus:V4SI
10248 (mult:V4SI
10249 (sign_extend:V4SI
10250 (vec_select:V4HI
10251 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10252 (parallel [(const_int 1)
10253 (const_int 3)
10254 (const_int 5)
10255 (const_int 7)])))
10256 (sign_extend:V4SI
10257 (vec_select:V4HI
10258 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10259 (parallel [(const_int 1)
10260 (const_int 3)
10261 (const_int 5)
10262 (const_int 7)]))))
10263 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10264 "TARGET_XOP"
10265 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10266 [(set_attr "type" "ssemuladd")
10267 (set_attr "mode" "TI")])
10268
10269 (define_insn "xop_pmadcsswd"
10270 [(set (match_operand:V4SI 0 "register_operand" "=x")
10271 (ss_plus:V4SI
10272 (plus:V4SI
10273 (mult:V4SI
10274 (sign_extend:V4SI
10275 (vec_select:V4HI
10276 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10277 (parallel [(const_int 0)
10278 (const_int 2)
10279 (const_int 4)
10280 (const_int 6)])))
10281 (sign_extend:V4SI
10282 (vec_select:V4HI
10283 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10284 (parallel [(const_int 0)
10285 (const_int 2)
10286 (const_int 4)
10287 (const_int 6)]))))
10288 (mult:V4SI
10289 (sign_extend:V4SI
10290 (vec_select:V4HI
10291 (match_dup 1)
10292 (parallel [(const_int 1)
10293 (const_int 3)
10294 (const_int 5)
10295 (const_int 7)])))
10296 (sign_extend:V4SI
10297 (vec_select:V4HI
10298 (match_dup 2)
10299 (parallel [(const_int 1)
10300 (const_int 3)
10301 (const_int 5)
10302 (const_int 7)])))))
10303 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10304 "TARGET_XOP"
10305 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10306 [(set_attr "type" "ssemuladd")
10307 (set_attr "mode" "TI")])
10308
10309 (define_insn "xop_pmadcswd"
10310 [(set (match_operand:V4SI 0 "register_operand" "=x")
10311 (plus:V4SI
10312 (plus:V4SI
10313 (mult:V4SI
10314 (sign_extend:V4SI
10315 (vec_select:V4HI
10316 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10317 (parallel [(const_int 0)
10318 (const_int 2)
10319 (const_int 4)
10320 (const_int 6)])))
10321 (sign_extend:V4SI
10322 (vec_select:V4HI
10323 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10324 (parallel [(const_int 0)
10325 (const_int 2)
10326 (const_int 4)
10327 (const_int 6)]))))
10328 (mult:V4SI
10329 (sign_extend:V4SI
10330 (vec_select:V4HI
10331 (match_dup 1)
10332 (parallel [(const_int 1)
10333 (const_int 3)
10334 (const_int 5)
10335 (const_int 7)])))
10336 (sign_extend:V4SI
10337 (vec_select:V4HI
10338 (match_dup 2)
10339 (parallel [(const_int 1)
10340 (const_int 3)
10341 (const_int 5)
10342 (const_int 7)])))))
10343 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10344 "TARGET_XOP"
10345 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10346 [(set_attr "type" "ssemuladd")
10347 (set_attr "mode" "TI")])
10348
10349 ;; XOP parallel XMM conditional moves
10350 (define_insn "xop_pcmov_<mode>"
10351 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10352 (if_then_else:SSEMODE
10353 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10354 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10355 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10356 "TARGET_XOP"
10357 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10358 [(set_attr "type" "sse4arg")])
10359
10360 (define_insn "xop_pcmov_<mode>256"
10361 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10362 (if_then_else:AVX256MODE
10363 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10364 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10365 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10366 "TARGET_XOP"
10367 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10368 [(set_attr "type" "sse4arg")])
10369
10370 ;; XOP horizontal add/subtract instructions
10371 (define_insn "xop_phaddbw"
10372 [(set (match_operand:V8HI 0 "register_operand" "=x")
10373 (plus:V8HI
10374 (sign_extend:V8HI
10375 (vec_select:V8QI
10376 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10377 (parallel [(const_int 0)
10378 (const_int 2)
10379 (const_int 4)
10380 (const_int 6)
10381 (const_int 8)
10382 (const_int 10)
10383 (const_int 12)
10384 (const_int 14)])))
10385 (sign_extend:V8HI
10386 (vec_select:V8QI
10387 (match_dup 1)
10388 (parallel [(const_int 1)
10389 (const_int 3)
10390 (const_int 5)
10391 (const_int 7)
10392 (const_int 9)
10393 (const_int 11)
10394 (const_int 13)
10395 (const_int 15)])))))]
10396 "TARGET_XOP"
10397 "vphaddbw\t{%1, %0|%0, %1}"
10398 [(set_attr "type" "sseiadd1")])
10399
10400 (define_insn "xop_phaddbd"
10401 [(set (match_operand:V4SI 0 "register_operand" "=x")
10402 (plus:V4SI
10403 (plus:V4SI
10404 (sign_extend:V4SI
10405 (vec_select:V4QI
10406 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10407 (parallel [(const_int 0)
10408 (const_int 4)
10409 (const_int 8)
10410 (const_int 12)])))
10411 (sign_extend:V4SI
10412 (vec_select:V4QI
10413 (match_dup 1)
10414 (parallel [(const_int 1)
10415 (const_int 5)
10416 (const_int 9)
10417 (const_int 13)]))))
10418 (plus:V4SI
10419 (sign_extend:V4SI
10420 (vec_select:V4QI
10421 (match_dup 1)
10422 (parallel [(const_int 2)
10423 (const_int 6)
10424 (const_int 10)
10425 (const_int 14)])))
10426 (sign_extend:V4SI
10427 (vec_select:V4QI
10428 (match_dup 1)
10429 (parallel [(const_int 3)
10430 (const_int 7)
10431 (const_int 11)
10432 (const_int 15)]))))))]
10433 "TARGET_XOP"
10434 "vphaddbd\t{%1, %0|%0, %1}"
10435 [(set_attr "type" "sseiadd1")])
10436
10437 (define_insn "xop_phaddbq"
10438 [(set (match_operand:V2DI 0 "register_operand" "=x")
10439 (plus:V2DI
10440 (plus:V2DI
10441 (plus:V2DI
10442 (sign_extend:V2DI
10443 (vec_select:V2QI
10444 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10445 (parallel [(const_int 0)
10446 (const_int 4)])))
10447 (sign_extend:V2DI
10448 (vec_select:V2QI
10449 (match_dup 1)
10450 (parallel [(const_int 1)
10451 (const_int 5)]))))
10452 (plus:V2DI
10453 (sign_extend:V2DI
10454 (vec_select:V2QI
10455 (match_dup 1)
10456 (parallel [(const_int 2)
10457 (const_int 6)])))
10458 (sign_extend:V2DI
10459 (vec_select:V2QI
10460 (match_dup 1)
10461 (parallel [(const_int 3)
10462 (const_int 7)])))))
10463 (plus:V2DI
10464 (plus:V2DI
10465 (sign_extend:V2DI
10466 (vec_select:V2QI
10467 (match_dup 1)
10468 (parallel [(const_int 8)
10469 (const_int 12)])))
10470 (sign_extend:V2DI
10471 (vec_select:V2QI
10472 (match_dup 1)
10473 (parallel [(const_int 9)
10474 (const_int 13)]))))
10475 (plus:V2DI
10476 (sign_extend:V2DI
10477 (vec_select:V2QI
10478 (match_dup 1)
10479 (parallel [(const_int 10)
10480 (const_int 14)])))
10481 (sign_extend:V2DI
10482 (vec_select:V2QI
10483 (match_dup 1)
10484 (parallel [(const_int 11)
10485 (const_int 15)])))))))]
10486 "TARGET_XOP"
10487 "vphaddbq\t{%1, %0|%0, %1}"
10488 [(set_attr "type" "sseiadd1")])
10489
10490 (define_insn "xop_phaddwd"
10491 [(set (match_operand:V4SI 0 "register_operand" "=x")
10492 (plus:V4SI
10493 (sign_extend:V4SI
10494 (vec_select:V4HI
10495 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10496 (parallel [(const_int 0)
10497 (const_int 2)
10498 (const_int 4)
10499 (const_int 6)])))
10500 (sign_extend:V4SI
10501 (vec_select:V4HI
10502 (match_dup 1)
10503 (parallel [(const_int 1)
10504 (const_int 3)
10505 (const_int 5)
10506 (const_int 7)])))))]
10507 "TARGET_XOP"
10508 "vphaddwd\t{%1, %0|%0, %1}"
10509 [(set_attr "type" "sseiadd1")])
10510
10511 (define_insn "xop_phaddwq"
10512 [(set (match_operand:V2DI 0 "register_operand" "=x")
10513 (plus:V2DI
10514 (plus:V2DI
10515 (sign_extend:V2DI
10516 (vec_select:V2HI
10517 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10518 (parallel [(const_int 0)
10519 (const_int 4)])))
10520 (sign_extend:V2DI
10521 (vec_select:V2HI
10522 (match_dup 1)
10523 (parallel [(const_int 1)
10524 (const_int 5)]))))
10525 (plus:V2DI
10526 (sign_extend:V2DI
10527 (vec_select:V2HI
10528 (match_dup 1)
10529 (parallel [(const_int 2)
10530 (const_int 6)])))
10531 (sign_extend:V2DI
10532 (vec_select:V2HI
10533 (match_dup 1)
10534 (parallel [(const_int 3)
10535 (const_int 7)]))))))]
10536 "TARGET_XOP"
10537 "vphaddwq\t{%1, %0|%0, %1}"
10538 [(set_attr "type" "sseiadd1")])
10539
10540 (define_insn "xop_phadddq"
10541 [(set (match_operand:V2DI 0 "register_operand" "=x")
10542 (plus:V2DI
10543 (sign_extend:V2DI
10544 (vec_select:V2SI
10545 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10546 (parallel [(const_int 0)
10547 (const_int 2)])))
10548 (sign_extend:V2DI
10549 (vec_select:V2SI
10550 (match_dup 1)
10551 (parallel [(const_int 1)
10552 (const_int 3)])))))]
10553 "TARGET_XOP"
10554 "vphadddq\t{%1, %0|%0, %1}"
10555 [(set_attr "type" "sseiadd1")])
10556
10557 (define_insn "xop_phaddubw"
10558 [(set (match_operand:V8HI 0 "register_operand" "=x")
10559 (plus:V8HI
10560 (zero_extend:V8HI
10561 (vec_select:V8QI
10562 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10563 (parallel [(const_int 0)
10564 (const_int 2)
10565 (const_int 4)
10566 (const_int 6)
10567 (const_int 8)
10568 (const_int 10)
10569 (const_int 12)
10570 (const_int 14)])))
10571 (zero_extend:V8HI
10572 (vec_select:V8QI
10573 (match_dup 1)
10574 (parallel [(const_int 1)
10575 (const_int 3)
10576 (const_int 5)
10577 (const_int 7)
10578 (const_int 9)
10579 (const_int 11)
10580 (const_int 13)
10581 (const_int 15)])))))]
10582 "TARGET_XOP"
10583 "vphaddubw\t{%1, %0|%0, %1}"
10584 [(set_attr "type" "sseiadd1")])
10585
10586 (define_insn "xop_phaddubd"
10587 [(set (match_operand:V4SI 0 "register_operand" "=x")
10588 (plus:V4SI
10589 (plus:V4SI
10590 (zero_extend:V4SI
10591 (vec_select:V4QI
10592 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10593 (parallel [(const_int 0)
10594 (const_int 4)
10595 (const_int 8)
10596 (const_int 12)])))
10597 (zero_extend:V4SI
10598 (vec_select:V4QI
10599 (match_dup 1)
10600 (parallel [(const_int 1)
10601 (const_int 5)
10602 (const_int 9)
10603 (const_int 13)]))))
10604 (plus:V4SI
10605 (zero_extend:V4SI
10606 (vec_select:V4QI
10607 (match_dup 1)
10608 (parallel [(const_int 2)
10609 (const_int 6)
10610 (const_int 10)
10611 (const_int 14)])))
10612 (zero_extend:V4SI
10613 (vec_select:V4QI
10614 (match_dup 1)
10615 (parallel [(const_int 3)
10616 (const_int 7)
10617 (const_int 11)
10618 (const_int 15)]))))))]
10619 "TARGET_XOP"
10620 "vphaddubd\t{%1, %0|%0, %1}"
10621 [(set_attr "type" "sseiadd1")])
10622
10623 (define_insn "xop_phaddubq"
10624 [(set (match_operand:V2DI 0 "register_operand" "=x")
10625 (plus:V2DI
10626 (plus:V2DI
10627 (plus:V2DI
10628 (zero_extend:V2DI
10629 (vec_select:V2QI
10630 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10631 (parallel [(const_int 0)
10632 (const_int 4)])))
10633 (sign_extend:V2DI
10634 (vec_select:V2QI
10635 (match_dup 1)
10636 (parallel [(const_int 1)
10637 (const_int 5)]))))
10638 (plus:V2DI
10639 (zero_extend:V2DI
10640 (vec_select:V2QI
10641 (match_dup 1)
10642 (parallel [(const_int 2)
10643 (const_int 6)])))
10644 (zero_extend:V2DI
10645 (vec_select:V2QI
10646 (match_dup 1)
10647 (parallel [(const_int 3)
10648 (const_int 7)])))))
10649 (plus:V2DI
10650 (plus:V2DI
10651 (zero_extend:V2DI
10652 (vec_select:V2QI
10653 (match_dup 1)
10654 (parallel [(const_int 8)
10655 (const_int 12)])))
10656 (sign_extend:V2DI
10657 (vec_select:V2QI
10658 (match_dup 1)
10659 (parallel [(const_int 9)
10660 (const_int 13)]))))
10661 (plus:V2DI
10662 (zero_extend:V2DI
10663 (vec_select:V2QI
10664 (match_dup 1)
10665 (parallel [(const_int 10)
10666 (const_int 14)])))
10667 (zero_extend:V2DI
10668 (vec_select:V2QI
10669 (match_dup 1)
10670 (parallel [(const_int 11)
10671 (const_int 15)])))))))]
10672 "TARGET_XOP"
10673 "vphaddubq\t{%1, %0|%0, %1}"
10674 [(set_attr "type" "sseiadd1")])
10675
10676 (define_insn "xop_phadduwd"
10677 [(set (match_operand:V4SI 0 "register_operand" "=x")
10678 (plus:V4SI
10679 (zero_extend:V4SI
10680 (vec_select:V4HI
10681 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10682 (parallel [(const_int 0)
10683 (const_int 2)
10684 (const_int 4)
10685 (const_int 6)])))
10686 (zero_extend:V4SI
10687 (vec_select:V4HI
10688 (match_dup 1)
10689 (parallel [(const_int 1)
10690 (const_int 3)
10691 (const_int 5)
10692 (const_int 7)])))))]
10693 "TARGET_XOP"
10694 "vphadduwd\t{%1, %0|%0, %1}"
10695 [(set_attr "type" "sseiadd1")])
10696
10697 (define_insn "xop_phadduwq"
10698 [(set (match_operand:V2DI 0 "register_operand" "=x")
10699 (plus:V2DI
10700 (plus:V2DI
10701 (zero_extend:V2DI
10702 (vec_select:V2HI
10703 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10704 (parallel [(const_int 0)
10705 (const_int 4)])))
10706 (zero_extend:V2DI
10707 (vec_select:V2HI
10708 (match_dup 1)
10709 (parallel [(const_int 1)
10710 (const_int 5)]))))
10711 (plus:V2DI
10712 (zero_extend:V2DI
10713 (vec_select:V2HI
10714 (match_dup 1)
10715 (parallel [(const_int 2)
10716 (const_int 6)])))
10717 (zero_extend:V2DI
10718 (vec_select:V2HI
10719 (match_dup 1)
10720 (parallel [(const_int 3)
10721 (const_int 7)]))))))]
10722 "TARGET_XOP"
10723 "vphadduwq\t{%1, %0|%0, %1}"
10724 [(set_attr "type" "sseiadd1")])
10725
10726 (define_insn "xop_phaddudq"
10727 [(set (match_operand:V2DI 0 "register_operand" "=x")
10728 (plus:V2DI
10729 (zero_extend:V2DI
10730 (vec_select:V2SI
10731 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10732 (parallel [(const_int 0)
10733 (const_int 2)])))
10734 (zero_extend:V2DI
10735 (vec_select:V2SI
10736 (match_dup 1)
10737 (parallel [(const_int 1)
10738 (const_int 3)])))))]
10739 "TARGET_XOP"
10740 "vphaddudq\t{%1, %0|%0, %1}"
10741 [(set_attr "type" "sseiadd1")])
10742
10743 (define_insn "xop_phsubbw"
10744 [(set (match_operand:V8HI 0 "register_operand" "=x")
10745 (minus:V8HI
10746 (sign_extend:V8HI
10747 (vec_select:V8QI
10748 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10749 (parallel [(const_int 0)
10750 (const_int 2)
10751 (const_int 4)
10752 (const_int 6)
10753 (const_int 8)
10754 (const_int 10)
10755 (const_int 12)
10756 (const_int 14)])))
10757 (sign_extend:V8HI
10758 (vec_select:V8QI
10759 (match_dup 1)
10760 (parallel [(const_int 1)
10761 (const_int 3)
10762 (const_int 5)
10763 (const_int 7)
10764 (const_int 9)
10765 (const_int 11)
10766 (const_int 13)
10767 (const_int 15)])))))]
10768 "TARGET_XOP"
10769 "vphsubbw\t{%1, %0|%0, %1}"
10770 [(set_attr "type" "sseiadd1")])
10771
10772 (define_insn "xop_phsubwd"
10773 [(set (match_operand:V4SI 0 "register_operand" "=x")
10774 (minus:V4SI
10775 (sign_extend:V4SI
10776 (vec_select:V4HI
10777 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10778 (parallel [(const_int 0)
10779 (const_int 2)
10780 (const_int 4)
10781 (const_int 6)])))
10782 (sign_extend:V4SI
10783 (vec_select:V4HI
10784 (match_dup 1)
10785 (parallel [(const_int 1)
10786 (const_int 3)
10787 (const_int 5)
10788 (const_int 7)])))))]
10789 "TARGET_XOP"
10790 "vphsubwd\t{%1, %0|%0, %1}"
10791 [(set_attr "type" "sseiadd1")])
10792
10793 (define_insn "xop_phsubdq"
10794 [(set (match_operand:V2DI 0 "register_operand" "=x")
10795 (minus:V2DI
10796 (sign_extend:V2DI
10797 (vec_select:V2SI
10798 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10799 (parallel [(const_int 0)
10800 (const_int 2)])))
10801 (sign_extend:V2DI
10802 (vec_select:V2SI
10803 (match_dup 1)
10804 (parallel [(const_int 1)
10805 (const_int 3)])))))]
10806 "TARGET_XOP"
10807 "vphsubdq\t{%1, %0|%0, %1}"
10808 [(set_attr "type" "sseiadd1")])
10809
10810 ;; XOP permute instructions
10811 (define_insn "xop_pperm"
10812 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10813 (unspec:V16QI
10814 [(match_operand:V16QI 1 "register_operand" "x,x")
10815 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10816 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10817 UNSPEC_XOP_PERMUTE))]
10818 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10819 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10820 [(set_attr "type" "sse4arg")
10821 (set_attr "mode" "TI")])
10822
10823 ;; XOP pack instructions that combine two vectors into a smaller vector
10824 (define_insn "xop_pperm_pack_v2di_v4si"
10825 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10826 (vec_concat:V4SI
10827 (truncate:V2SI
10828 (match_operand:V2DI 1 "register_operand" "x,x"))
10829 (truncate:V2SI
10830 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10831 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10832 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10833 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10834 [(set_attr "type" "sse4arg")
10835 (set_attr "mode" "TI")])
10836
10837 (define_insn "xop_pperm_pack_v4si_v8hi"
10838 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10839 (vec_concat:V8HI
10840 (truncate:V4HI
10841 (match_operand:V4SI 1 "register_operand" "x,x"))
10842 (truncate:V4HI
10843 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10844 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10845 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10846 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10847 [(set_attr "type" "sse4arg")
10848 (set_attr "mode" "TI")])
10849
10850 (define_insn "xop_pperm_pack_v8hi_v16qi"
10851 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10852 (vec_concat:V16QI
10853 (truncate:V8QI
10854 (match_operand:V8HI 1 "register_operand" "x,x"))
10855 (truncate:V8QI
10856 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10857 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10858 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10859 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10860 [(set_attr "type" "sse4arg")
10861 (set_attr "mode" "TI")])
10862
10863 ;; XOP packed rotate instructions
10864 (define_expand "rotl<mode>3"
10865 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10866 (rotate:SSEMODE1248
10867 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10868 (match_operand:SI 2 "general_operand")))]
10869 "TARGET_XOP"
10870 {
10871 /* If we were given a scalar, convert it to parallel */
10872 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10873 {
10874 rtvec vs = rtvec_alloc (<ssescalarnum>);
10875 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10876 rtx reg = gen_reg_rtx (<MODE>mode);
10877 rtx op2 = operands[2];
10878 int i;
10879
10880 if (GET_MODE (op2) != <ssescalarmode>mode)
10881 {
10882 op2 = gen_reg_rtx (<ssescalarmode>mode);
10883 convert_move (op2, operands[2], false);
10884 }
10885
10886 for (i = 0; i < <ssescalarnum>; i++)
10887 RTVEC_ELT (vs, i) = op2;
10888
10889 emit_insn (gen_vec_init<mode> (reg, par));
10890 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10891 DONE;
10892 }
10893 })
10894
10895 (define_expand "rotr<mode>3"
10896 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10897 (rotatert:SSEMODE1248
10898 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10899 (match_operand:SI 2 "general_operand")))]
10900 "TARGET_XOP"
10901 {
10902 /* If we were given a scalar, convert it to parallel */
10903 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10904 {
10905 rtvec vs = rtvec_alloc (<ssescalarnum>);
10906 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10907 rtx neg = gen_reg_rtx (<MODE>mode);
10908 rtx reg = gen_reg_rtx (<MODE>mode);
10909 rtx op2 = operands[2];
10910 int i;
10911
10912 if (GET_MODE (op2) != <ssescalarmode>mode)
10913 {
10914 op2 = gen_reg_rtx (<ssescalarmode>mode);
10915 convert_move (op2, operands[2], false);
10916 }
10917
10918 for (i = 0; i < <ssescalarnum>; i++)
10919 RTVEC_ELT (vs, i) = op2;
10920
10921 emit_insn (gen_vec_init<mode> (reg, par));
10922 emit_insn (gen_neg<mode>2 (neg, reg));
10923 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
10924 DONE;
10925 }
10926 })
10927
10928 (define_insn "xop_rotl<mode>3"
10929 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10930 (rotate:SSEMODE1248
10931 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10932 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10933 "TARGET_XOP"
10934 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10935 [(set_attr "type" "sseishft")
10936 (set_attr "length_immediate" "1")
10937 (set_attr "mode" "TI")])
10938
10939 (define_insn "xop_rotr<mode>3"
10940 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
10941 (rotatert:SSEMODE1248
10942 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
10943 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
10944 "TARGET_XOP"
10945 {
10946 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
10947 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
10948 }
10949 [(set_attr "type" "sseishft")
10950 (set_attr "length_immediate" "1")
10951 (set_attr "mode" "TI")])
10952
10953 (define_expand "vrotr<mode>3"
10954 [(match_operand:SSEMODE1248 0 "register_operand" "")
10955 (match_operand:SSEMODE1248 1 "register_operand" "")
10956 (match_operand:SSEMODE1248 2 "register_operand" "")]
10957 "TARGET_XOP"
10958 {
10959 rtx reg = gen_reg_rtx (<MODE>mode);
10960 emit_insn (gen_neg<mode>2 (reg, operands[2]));
10961 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10962 DONE;
10963 })
10964
10965 (define_expand "vrotl<mode>3"
10966 [(match_operand:SSEMODE1248 0 "register_operand" "")
10967 (match_operand:SSEMODE1248 1 "register_operand" "")
10968 (match_operand:SSEMODE1248 2 "register_operand" "")]
10969 "TARGET_XOP"
10970 {
10971 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
10972 DONE;
10973 })
10974
10975 (define_insn "xop_vrotl<mode>3"
10976 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
10977 (if_then_else:SSEMODE1248
10978 (ge:SSEMODE1248
10979 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
10980 (const_int 0))
10981 (rotate:SSEMODE1248
10982 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
10983 (match_dup 2))
10984 (rotatert:SSEMODE1248
10985 (match_dup 1)
10986 (neg:SSEMODE1248 (match_dup 2)))))]
10987 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10988 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
10989 [(set_attr "type" "sseishft")
10990 (set_attr "prefix_data16" "0")
10991 (set_attr "prefix_extra" "2")
10992 (set_attr "mode" "TI")])
10993
10994 ;; XOP packed shift instructions.
10995 ;; FIXME: add V2DI back in
10996 (define_expand "vlshr<mode>3"
10997 [(match_operand:SSEMODE124 0 "register_operand" "")
10998 (match_operand:SSEMODE124 1 "register_operand" "")
10999 (match_operand:SSEMODE124 2 "register_operand" "")]
11000 "TARGET_XOP"
11001 {
11002 rtx neg = gen_reg_rtx (<MODE>mode);
11003 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11004 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11005 DONE;
11006 })
11007
11008 (define_expand "vashr<mode>3"
11009 [(match_operand:SSEMODE124 0 "register_operand" "")
11010 (match_operand:SSEMODE124 1 "register_operand" "")
11011 (match_operand:SSEMODE124 2 "register_operand" "")]
11012 "TARGET_XOP"
11013 {
11014 rtx neg = gen_reg_rtx (<MODE>mode);
11015 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11016 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11017 DONE;
11018 })
11019
11020 (define_expand "vashl<mode>3"
11021 [(match_operand:SSEMODE124 0 "register_operand" "")
11022 (match_operand:SSEMODE124 1 "register_operand" "")
11023 (match_operand:SSEMODE124 2 "register_operand" "")]
11024 "TARGET_XOP"
11025 {
11026 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11027 DONE;
11028 })
11029
11030 (define_insn "xop_ashl<mode>3"
11031 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11032 (if_then_else:SSEMODE1248
11033 (ge:SSEMODE1248
11034 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11035 (const_int 0))
11036 (ashift:SSEMODE1248
11037 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11038 (match_dup 2))
11039 (ashiftrt:SSEMODE1248
11040 (match_dup 1)
11041 (neg:SSEMODE1248 (match_dup 2)))))]
11042 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11043 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11044 [(set_attr "type" "sseishft")
11045 (set_attr "prefix_data16" "0")
11046 (set_attr "prefix_extra" "2")
11047 (set_attr "mode" "TI")])
11048
11049 (define_insn "xop_lshl<mode>3"
11050 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11051 (if_then_else:SSEMODE1248
11052 (ge:SSEMODE1248
11053 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11054 (const_int 0))
11055 (ashift:SSEMODE1248
11056 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11057 (match_dup 2))
11058 (lshiftrt:SSEMODE1248
11059 (match_dup 1)
11060 (neg:SSEMODE1248 (match_dup 2)))))]
11061 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11062 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11063 [(set_attr "type" "sseishft")
11064 (set_attr "prefix_data16" "0")
11065 (set_attr "prefix_extra" "2")
11066 (set_attr "mode" "TI")])
11067
11068 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11069 (define_expand "ashlv16qi3"
11070 [(match_operand:V16QI 0 "register_operand" "")
11071 (match_operand:V16QI 1 "register_operand" "")
11072 (match_operand:SI 2 "nonmemory_operand" "")]
11073 "TARGET_XOP"
11074 {
11075 rtvec vs = rtvec_alloc (16);
11076 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11077 rtx reg = gen_reg_rtx (V16QImode);
11078 int i;
11079 for (i = 0; i < 16; i++)
11080 RTVEC_ELT (vs, i) = operands[2];
11081
11082 emit_insn (gen_vec_initv16qi (reg, par));
11083 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11084 DONE;
11085 })
11086
11087 (define_expand "lshlv16qi3"
11088 [(match_operand:V16QI 0 "register_operand" "")
11089 (match_operand:V16QI 1 "register_operand" "")
11090 (match_operand:SI 2 "nonmemory_operand" "")]
11091 "TARGET_XOP"
11092 {
11093 rtvec vs = rtvec_alloc (16);
11094 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11095 rtx reg = gen_reg_rtx (V16QImode);
11096 int i;
11097 for (i = 0; i < 16; i++)
11098 RTVEC_ELT (vs, i) = operands[2];
11099
11100 emit_insn (gen_vec_initv16qi (reg, par));
11101 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11102 DONE;
11103 })
11104
11105 (define_expand "ashrv16qi3"
11106 [(match_operand:V16QI 0 "register_operand" "")
11107 (match_operand:V16QI 1 "register_operand" "")
11108 (match_operand:SI 2 "nonmemory_operand" "")]
11109 "TARGET_XOP"
11110 {
11111 rtvec vs = rtvec_alloc (16);
11112 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11113 rtx reg = gen_reg_rtx (V16QImode);
11114 int i;
11115 rtx ele = ((CONST_INT_P (operands[2]))
11116 ? GEN_INT (- INTVAL (operands[2]))
11117 : operands[2]);
11118
11119 for (i = 0; i < 16; i++)
11120 RTVEC_ELT (vs, i) = ele;
11121
11122 emit_insn (gen_vec_initv16qi (reg, par));
11123
11124 if (!CONST_INT_P (operands[2]))
11125 {
11126 rtx neg = gen_reg_rtx (V16QImode);
11127 emit_insn (gen_negv16qi2 (neg, reg));
11128 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11129 }
11130 else
11131 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11132
11133 DONE;
11134 })
11135
11136 (define_expand "ashrv2di3"
11137 [(match_operand:V2DI 0 "register_operand" "")
11138 (match_operand:V2DI 1 "register_operand" "")
11139 (match_operand:DI 2 "nonmemory_operand" "")]
11140 "TARGET_XOP"
11141 {
11142 rtvec vs = rtvec_alloc (2);
11143 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11144 rtx reg = gen_reg_rtx (V2DImode);
11145 rtx ele;
11146
11147 if (CONST_INT_P (operands[2]))
11148 ele = GEN_INT (- INTVAL (operands[2]));
11149 else if (GET_MODE (operands[2]) != DImode)
11150 {
11151 rtx move = gen_reg_rtx (DImode);
11152 ele = gen_reg_rtx (DImode);
11153 convert_move (move, operands[2], false);
11154 emit_insn (gen_negdi2 (ele, move));
11155 }
11156 else
11157 {
11158 ele = gen_reg_rtx (DImode);
11159 emit_insn (gen_negdi2 (ele, operands[2]));
11160 }
11161
11162 RTVEC_ELT (vs, 0) = ele;
11163 RTVEC_ELT (vs, 1) = ele;
11164 emit_insn (gen_vec_initv2di (reg, par));
11165 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11166 DONE;
11167 })
11168
11169 ;; XOP FRCZ support
11170 ;; parallel insns
11171 (define_insn "xop_frcz<mode>2"
11172 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11173 (unspec:SSEMODEF2P
11174 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11175 UNSPEC_FRCZ))]
11176 "TARGET_XOP"
11177 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11178 [(set_attr "type" "ssecvt1")
11179 (set_attr "mode" "<MODE>")])
11180
11181 ;; scalar insns
11182 (define_insn "xop_vmfrcz<mode>2"
11183 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11184 (vec_merge:SSEMODEF2P
11185 (unspec:SSEMODEF2P
11186 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11187 UNSPEC_FRCZ)
11188 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11189 (const_int 1)))]
11190 "TARGET_XOP"
11191 "vfrcz<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
11192 [(set_attr "type" "ssecvt1")
11193 (set_attr "mode" "<MODE>")])
11194
11195 (define_insn "xop_frcz<mode>2256"
11196 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11197 (unspec:FMA4MODEF4
11198 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11199 UNSPEC_FRCZ))]
11200 "TARGET_XOP"
11201 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11202 [(set_attr "type" "ssecvt1")
11203 (set_attr "mode" "<MODE>")])
11204
11205 (define_insn "xop_maskcmp<mode>3"
11206 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11207 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11208 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11209 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11210 "TARGET_XOP"
11211 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11212 [(set_attr "type" "sse4arg")
11213 (set_attr "prefix_data16" "0")
11214 (set_attr "prefix_rep" "0")
11215 (set_attr "prefix_extra" "2")
11216 (set_attr "length_immediate" "1")
11217 (set_attr "mode" "TI")])
11218
11219 (define_insn "xop_maskcmp_uns<mode>3"
11220 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11221 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11222 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11223 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11224 "TARGET_XOP"
11225 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11226 [(set_attr "type" "ssecmp")
11227 (set_attr "prefix_data16" "0")
11228 (set_attr "prefix_rep" "0")
11229 (set_attr "prefix_extra" "2")
11230 (set_attr "length_immediate" "1")
11231 (set_attr "mode" "TI")])
11232
11233 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11234 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11235 ;; the exact instruction generated for the intrinsic.
11236 (define_insn "xop_maskcmp_uns2<mode>3"
11237 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11238 (unspec:SSEMODE1248
11239 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11240 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11241 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11242 UNSPEC_XOP_UNSIGNED_CMP))]
11243 "TARGET_XOP"
11244 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11245 [(set_attr "type" "ssecmp")
11246 (set_attr "prefix_data16" "0")
11247 (set_attr "prefix_extra" "2")
11248 (set_attr "length_immediate" "1")
11249 (set_attr "mode" "TI")])
11250
11251 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11252 ;; being added here to be complete.
11253 (define_insn "xop_pcom_tf<mode>3"
11254 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11255 (unspec:SSEMODE1248
11256 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11257 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11258 (match_operand:SI 3 "const_int_operand" "n")]
11259 UNSPEC_XOP_TRUEFALSE))]
11260 "TARGET_XOP"
11261 {
11262 return ((INTVAL (operands[3]) != 0)
11263 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11264 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11265 }
11266 [(set_attr "type" "ssecmp")
11267 (set_attr "prefix_data16" "0")
11268 (set_attr "prefix_extra" "2")
11269 (set_attr "length_immediate" "1")
11270 (set_attr "mode" "TI")])
11271
11272 (define_insn "xop_vpermil2<mode>3"
11273 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11274 (unspec:AVXMODEF2P
11275 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11276 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11277 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
11278 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11279 UNSPEC_VPERMIL2))]
11280 "TARGET_XOP"
11281 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11282 [(set_attr "type" "sse4arg")
11283 (set_attr "length_immediate" "1")
11284 (set_attr "mode" "<MODE>")])
11285
11286 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11287 (define_insn "*avx_aesenc"
11288 [(set (match_operand:V2DI 0 "register_operand" "=x")
11289 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11290 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11291 UNSPEC_AESENC))]
11292 "TARGET_AES && TARGET_AVX"
11293 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11294 [(set_attr "type" "sselog1")
11295 (set_attr "prefix_extra" "1")
11296 (set_attr "prefix" "vex")
11297 (set_attr "mode" "TI")])
11298
11299 (define_insn "aesenc"
11300 [(set (match_operand:V2DI 0 "register_operand" "=x")
11301 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11302 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11303 UNSPEC_AESENC))]
11304 "TARGET_AES"
11305 "aesenc\t{%2, %0|%0, %2}"
11306 [(set_attr "type" "sselog1")
11307 (set_attr "prefix_extra" "1")
11308 (set_attr "mode" "TI")])
11309
11310 (define_insn "*avx_aesenclast"
11311 [(set (match_operand:V2DI 0 "register_operand" "=x")
11312 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11313 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11314 UNSPEC_AESENCLAST))]
11315 "TARGET_AES && TARGET_AVX"
11316 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11317 [(set_attr "type" "sselog1")
11318 (set_attr "prefix_extra" "1")
11319 (set_attr "prefix" "vex")
11320 (set_attr "mode" "TI")])
11321
11322 (define_insn "aesenclast"
11323 [(set (match_operand:V2DI 0 "register_operand" "=x")
11324 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11325 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11326 UNSPEC_AESENCLAST))]
11327 "TARGET_AES"
11328 "aesenclast\t{%2, %0|%0, %2}"
11329 [(set_attr "type" "sselog1")
11330 (set_attr "prefix_extra" "1")
11331 (set_attr "mode" "TI")])
11332
11333 (define_insn "*avx_aesdec"
11334 [(set (match_operand:V2DI 0 "register_operand" "=x")
11335 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11336 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11337 UNSPEC_AESDEC))]
11338 "TARGET_AES && TARGET_AVX"
11339 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11340 [(set_attr "type" "sselog1")
11341 (set_attr "prefix_extra" "1")
11342 (set_attr "prefix" "vex")
11343 (set_attr "mode" "TI")])
11344
11345 (define_insn "aesdec"
11346 [(set (match_operand:V2DI 0 "register_operand" "=x")
11347 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11348 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11349 UNSPEC_AESDEC))]
11350 "TARGET_AES"
11351 "aesdec\t{%2, %0|%0, %2}"
11352 [(set_attr "type" "sselog1")
11353 (set_attr "prefix_extra" "1")
11354 (set_attr "mode" "TI")])
11355
11356 (define_insn "*avx_aesdeclast"
11357 [(set (match_operand:V2DI 0 "register_operand" "=x")
11358 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11359 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11360 UNSPEC_AESDECLAST))]
11361 "TARGET_AES && TARGET_AVX"
11362 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11363 [(set_attr "type" "sselog1")
11364 (set_attr "prefix_extra" "1")
11365 (set_attr "prefix" "vex")
11366 (set_attr "mode" "TI")])
11367
11368 (define_insn "aesdeclast"
11369 [(set (match_operand:V2DI 0 "register_operand" "=x")
11370 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11371 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11372 UNSPEC_AESDECLAST))]
11373 "TARGET_AES"
11374 "aesdeclast\t{%2, %0|%0, %2}"
11375 [(set_attr "type" "sselog1")
11376 (set_attr "prefix_extra" "1")
11377 (set_attr "mode" "TI")])
11378
11379 (define_insn "aesimc"
11380 [(set (match_operand:V2DI 0 "register_operand" "=x")
11381 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11382 UNSPEC_AESIMC))]
11383 "TARGET_AES"
11384 "%vaesimc\t{%1, %0|%0, %1}"
11385 [(set_attr "type" "sselog1")
11386 (set_attr "prefix_extra" "1")
11387 (set_attr "prefix" "maybe_vex")
11388 (set_attr "mode" "TI")])
11389
11390 (define_insn "aeskeygenassist"
11391 [(set (match_operand:V2DI 0 "register_operand" "=x")
11392 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11393 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11394 UNSPEC_AESKEYGENASSIST))]
11395 "TARGET_AES"
11396 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11397 [(set_attr "type" "sselog1")
11398 (set_attr "prefix_extra" "1")
11399 (set_attr "length_immediate" "1")
11400 (set_attr "prefix" "maybe_vex")
11401 (set_attr "mode" "TI")])
11402
11403 (define_insn "*vpclmulqdq"
11404 [(set (match_operand:V2DI 0 "register_operand" "=x")
11405 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11406 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11407 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11408 UNSPEC_PCLMUL))]
11409 "TARGET_PCLMUL && TARGET_AVX"
11410 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11411 [(set_attr "type" "sselog1")
11412 (set_attr "prefix_extra" "1")
11413 (set_attr "length_immediate" "1")
11414 (set_attr "prefix" "vex")
11415 (set_attr "mode" "TI")])
11416
11417 (define_insn "pclmulqdq"
11418 [(set (match_operand:V2DI 0 "register_operand" "=x")
11419 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11420 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11421 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11422 UNSPEC_PCLMUL))]
11423 "TARGET_PCLMUL"
11424 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11425 [(set_attr "type" "sselog1")
11426 (set_attr "prefix_extra" "1")
11427 (set_attr "length_immediate" "1")
11428 (set_attr "mode" "TI")])
11429
11430 (define_expand "avx_vzeroall"
11431 [(match_par_dup 0 [(const_int 0)])]
11432 "TARGET_AVX"
11433 {
11434 int nregs = TARGET_64BIT ? 16 : 8;
11435 int regno;
11436
11437 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11438
11439 XVECEXP (operands[0], 0, 0)
11440 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11441 UNSPECV_VZEROALL);
11442
11443 for (regno = 0; regno < nregs; regno++)
11444 XVECEXP (operands[0], 0, regno + 1)
11445 = gen_rtx_SET (VOIDmode,
11446 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11447 CONST0_RTX (V8SImode));
11448 })
11449
11450 (define_insn "*avx_vzeroall"
11451 [(match_parallel 0 "vzeroall_operation"
11452 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11453 "TARGET_AVX"
11454 "vzeroall"
11455 [(set_attr "type" "sse")
11456 (set_attr "modrm" "0")
11457 (set_attr "memory" "none")
11458 (set_attr "prefix" "vex")
11459 (set_attr "mode" "OI")])
11460
11461 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11462 (define_expand "avx_vzeroupper"
11463 [(match_par_dup 0 [(const_int 0)])]
11464 "TARGET_AVX"
11465 {
11466 int nregs = TARGET_64BIT ? 16 : 8;
11467 int regno;
11468
11469 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11470
11471 XVECEXP (operands[0], 0, 0)
11472 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11473 UNSPECV_VZEROUPPER);
11474
11475 for (regno = 0; regno < nregs; regno++)
11476 XVECEXP (operands[0], 0, regno + 1)
11477 = gen_rtx_CLOBBER (VOIDmode,
11478 gen_rtx_REG (V8SImode, SSE_REGNO (regno)));
11479 })
11480
11481 (define_insn "*avx_vzeroupper"
11482 [(match_parallel 0 "vzeroupper_operation"
11483 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
11484 "TARGET_AVX"
11485 "vzeroupper"
11486 [(set_attr "type" "sse")
11487 (set_attr "modrm" "0")
11488 (set_attr "memory" "none")
11489 (set_attr "prefix" "vex")
11490 (set_attr "mode" "OI")])
11491
11492 (define_insn_and_split "vec_dup<mode>"
11493 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11494 (vec_duplicate:AVX256MODE24P
11495 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11496 "TARGET_AVX"
11497 "@
11498 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11499 #"
11500 "&& reload_completed && REG_P (operands[1])"
11501 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11502 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11503 "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
11504 [(set_attr "type" "ssemov")
11505 (set_attr "prefix_extra" "1")
11506 (set_attr "prefix" "vex")
11507 (set_attr "mode" "V8SF")])
11508
11509 (define_insn "avx_vbroadcastf128_<mode>"
11510 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11511 (vec_concat:AVX256MODE
11512 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11513 (match_dup 1)))]
11514 "TARGET_AVX"
11515 "@
11516 vbroadcastf128\t{%1, %0|%0, %1}
11517 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11518 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11519 [(set_attr "type" "ssemov,sselog1,sselog1")
11520 (set_attr "prefix_extra" "1")
11521 (set_attr "length_immediate" "0,1,1")
11522 (set_attr "prefix" "vex")
11523 (set_attr "mode" "V4SF,V8SF,V8SF")])
11524
11525 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11526 ;; If it so happens that the input is in memory, use vbroadcast.
11527 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11528 (define_insn "*avx_vperm_broadcast_v4sf"
11529 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11530 (vec_select:V4SF
11531 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11532 (match_parallel 2 "avx_vbroadcast_operand"
11533 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11534 "TARGET_AVX"
11535 {
11536 int elt = INTVAL (operands[3]);
11537 switch (which_alternative)
11538 {
11539 case 0:
11540 case 1:
11541 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11542 return "vbroadcastss\t{%1, %0|%0, %1}";
11543 case 2:
11544 operands[2] = GEN_INT (elt * 0x55);
11545 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11546 default:
11547 gcc_unreachable ();
11548 }
11549 }
11550 [(set_attr "type" "ssemov,ssemov,sselog1")
11551 (set_attr "prefix_extra" "1")
11552 (set_attr "length_immediate" "0,0,1")
11553 (set_attr "prefix" "vex")
11554 (set_attr "mode" "SF,SF,V4SF")])
11555
11556 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11557 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11558 (vec_select:AVX256MODEF2P
11559 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11560 (match_parallel 2 "avx_vbroadcast_operand"
11561 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11562 "TARGET_AVX"
11563 "#"
11564 "&& reload_completed"
11565 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11566 {
11567 rtx op0 = operands[0], op1 = operands[1];
11568 int elt = INTVAL (operands[3]);
11569
11570 if (REG_P (op1))
11571 {
11572 int mask;
11573
11574 /* Shuffle element we care about into all elements of the 128-bit lane.
11575 The other lane gets shuffled too, but we don't care. */
11576 if (<MODE>mode == V4DFmode)
11577 mask = (elt & 1 ? 15 : 0);
11578 else
11579 mask = (elt & 3) * 0x55;
11580 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11581
11582 /* Shuffle the lane we care about into both lanes of the dest. */
11583 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11584 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11585 DONE;
11586 }
11587
11588 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11589 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11590 })
11591
11592 (define_expand "avx_vpermil<mode>"
11593 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11594 (vec_select:AVXMODEFDP
11595 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11596 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11597 "TARGET_AVX"
11598 {
11599 int mask = INTVAL (operands[2]);
11600 rtx perm[<ssescalarnum>];
11601
11602 perm[0] = GEN_INT (mask & 1);
11603 perm[1] = GEN_INT ((mask >> 1) & 1);
11604 if (<MODE>mode == V4DFmode)
11605 {
11606 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11607 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11608 }
11609
11610 operands[2]
11611 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11612 })
11613
11614 (define_expand "avx_vpermil<mode>"
11615 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11616 (vec_select:AVXMODEFSP
11617 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11618 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11619 "TARGET_AVX"
11620 {
11621 int mask = INTVAL (operands[2]);
11622 rtx perm[<ssescalarnum>];
11623
11624 perm[0] = GEN_INT (mask & 3);
11625 perm[1] = GEN_INT ((mask >> 2) & 3);
11626 perm[2] = GEN_INT ((mask >> 4) & 3);
11627 perm[3] = GEN_INT ((mask >> 6) & 3);
11628 if (<MODE>mode == V8SFmode)
11629 {
11630 perm[4] = GEN_INT ((mask & 3) + 4);
11631 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11632 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11633 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11634 }
11635
11636 operands[2]
11637 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11638 })
11639
11640 (define_insn "*avx_vpermilp<mode>"
11641 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11642 (vec_select:AVXMODEF2P
11643 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11644 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11645 [(match_operand 3 "const_int_operand" "")])))]
11646 "TARGET_AVX"
11647 {
11648 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11649 operands[2] = GEN_INT (mask);
11650 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11651 }
11652 [(set_attr "type" "sselog")
11653 (set_attr "prefix_extra" "1")
11654 (set_attr "length_immediate" "1")
11655 (set_attr "prefix" "vex")
11656 (set_attr "mode" "<MODE>")])
11657
11658 (define_insn "avx_vpermilvar<mode>3"
11659 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11660 (unspec:AVXMODEF2P
11661 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11662 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11663 UNSPEC_VPERMIL))]
11664 "TARGET_AVX"
11665 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11666 [(set_attr "type" "sselog")
11667 (set_attr "prefix_extra" "1")
11668 (set_attr "prefix" "vex")
11669 (set_attr "mode" "<MODE>")])
11670
11671 (define_expand "avx_vperm2f128<mode>3"
11672 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11673 (unspec:AVX256MODE2P
11674 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11675 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11676 (match_operand:SI 3 "const_0_to_255_operand" "")]
11677 UNSPEC_VPERMIL2F128))]
11678 "TARGET_AVX"
11679 {
11680 int mask = INTVAL (operands[3]);
11681 if ((mask & 0x88) == 0)
11682 {
11683 rtx perm[<ssescalarnum>], t1, t2;
11684 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11685
11686 base = (mask & 3) * nelt2;
11687 for (i = 0; i < nelt2; ++i)
11688 perm[i] = GEN_INT (base + i);
11689
11690 base = ((mask >> 4) & 3) * nelt2;
11691 for (i = 0; i < nelt2; ++i)
11692 perm[i + nelt2] = GEN_INT (base + i);
11693
11694 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
11695 operands[1], operands[2]);
11696 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11697 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11698 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11699 emit_insn (t2);
11700 DONE;
11701 }
11702 })
11703
11704 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11705 ;; means that in order to represent this properly in rtl we'd have to
11706 ;; nest *another* vec_concat with a zero operand and do the select from
11707 ;; a 4x wide vector. That doesn't seem very nice.
11708 (define_insn "*avx_vperm2f128<mode>_full"
11709 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11710 (unspec:AVX256MODE2P
11711 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11712 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11713 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11714 UNSPEC_VPERMIL2F128))]
11715 "TARGET_AVX"
11716 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11717 [(set_attr "type" "sselog")
11718 (set_attr "prefix_extra" "1")
11719 (set_attr "length_immediate" "1")
11720 (set_attr "prefix" "vex")
11721 (set_attr "mode" "V8SF")])
11722
11723 (define_insn "*avx_vperm2f128<mode>_nozero"
11724 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11725 (vec_select:AVX256MODE2P
11726 (vec_concat:<ssedoublesizemode>
11727 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11728 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11729 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
11730 [(match_operand 4 "const_int_operand" "")])))]
11731 "TARGET_AVX"
11732 {
11733 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11734 operands[3] = GEN_INT (mask);
11735 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11736 }
11737 [(set_attr "type" "sselog")
11738 (set_attr "prefix_extra" "1")
11739 (set_attr "length_immediate" "1")
11740 (set_attr "prefix" "vex")
11741 (set_attr "mode" "V8SF")])
11742
11743 (define_expand "avx_vinsertf128<mode>"
11744 [(match_operand:AVX256MODE 0 "register_operand" "")
11745 (match_operand:AVX256MODE 1 "register_operand" "")
11746 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11747 (match_operand:SI 3 "const_0_to_1_operand" "")]
11748 "TARGET_AVX"
11749 {
11750 switch (INTVAL (operands[3]))
11751 {
11752 case 0:
11753 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
11754 operands[2]));
11755 break;
11756 case 1:
11757 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
11758 operands[2]));
11759 break;
11760 default:
11761 gcc_unreachable ();
11762 }
11763 DONE;
11764 })
11765
11766 (define_insn "vec_set_lo_<mode>"
11767 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11768 (vec_concat:AVX256MODE4P
11769 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11770 (vec_select:<avxhalfvecmode>
11771 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11772 (parallel [(const_int 2) (const_int 3)]))))]
11773 "TARGET_AVX"
11774 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11775 [(set_attr "type" "sselog")
11776 (set_attr "prefix_extra" "1")
11777 (set_attr "length_immediate" "1")
11778 (set_attr "prefix" "vex")
11779 (set_attr "mode" "V8SF")])
11780
11781 (define_insn "vec_set_hi_<mode>"
11782 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11783 (vec_concat:AVX256MODE4P
11784 (vec_select:<avxhalfvecmode>
11785 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11786 (parallel [(const_int 0) (const_int 1)]))
11787 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11788 "TARGET_AVX"
11789 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11790 [(set_attr "type" "sselog")
11791 (set_attr "prefix_extra" "1")
11792 (set_attr "length_immediate" "1")
11793 (set_attr "prefix" "vex")
11794 (set_attr "mode" "V8SF")])
11795
11796 (define_insn "vec_set_lo_<mode>"
11797 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11798 (vec_concat:AVX256MODE8P
11799 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11800 (vec_select:<avxhalfvecmode>
11801 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11802 (parallel [(const_int 4) (const_int 5)
11803 (const_int 6) (const_int 7)]))))]
11804 "TARGET_AVX"
11805 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11806 [(set_attr "type" "sselog")
11807 (set_attr "prefix_extra" "1")
11808 (set_attr "length_immediate" "1")
11809 (set_attr "prefix" "vex")
11810 (set_attr "mode" "V8SF")])
11811
11812 (define_insn "vec_set_hi_<mode>"
11813 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11814 (vec_concat:AVX256MODE8P
11815 (vec_select:<avxhalfvecmode>
11816 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11817 (parallel [(const_int 0) (const_int 1)
11818 (const_int 2) (const_int 3)]))
11819 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11820 "TARGET_AVX"
11821 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11822 [(set_attr "type" "sselog")
11823 (set_attr "prefix_extra" "1")
11824 (set_attr "length_immediate" "1")
11825 (set_attr "prefix" "vex")
11826 (set_attr "mode" "V8SF")])
11827
11828 (define_insn "vec_set_lo_v16hi"
11829 [(set (match_operand:V16HI 0 "register_operand" "=x")
11830 (vec_concat:V16HI
11831 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11832 (vec_select:V8HI
11833 (match_operand:V16HI 1 "register_operand" "x")
11834 (parallel [(const_int 8) (const_int 9)
11835 (const_int 10) (const_int 11)
11836 (const_int 12) (const_int 13)
11837 (const_int 14) (const_int 15)]))))]
11838 "TARGET_AVX"
11839 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11840 [(set_attr "type" "sselog")
11841 (set_attr "prefix_extra" "1")
11842 (set_attr "length_immediate" "1")
11843 (set_attr "prefix" "vex")
11844 (set_attr "mode" "V8SF")])
11845
11846 (define_insn "vec_set_hi_v16hi"
11847 [(set (match_operand:V16HI 0 "register_operand" "=x")
11848 (vec_concat:V16HI
11849 (vec_select:V8HI
11850 (match_operand:V16HI 1 "register_operand" "x")
11851 (parallel [(const_int 0) (const_int 1)
11852 (const_int 2) (const_int 3)
11853 (const_int 4) (const_int 5)
11854 (const_int 6) (const_int 7)]))
11855 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11856 "TARGET_AVX"
11857 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11858 [(set_attr "type" "sselog")
11859 (set_attr "prefix_extra" "1")
11860 (set_attr "length_immediate" "1")
11861 (set_attr "prefix" "vex")
11862 (set_attr "mode" "V8SF")])
11863
11864 (define_insn "vec_set_lo_v32qi"
11865 [(set (match_operand:V32QI 0 "register_operand" "=x")
11866 (vec_concat:V32QI
11867 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11868 (vec_select:V16QI
11869 (match_operand:V32QI 1 "register_operand" "x")
11870 (parallel [(const_int 16) (const_int 17)
11871 (const_int 18) (const_int 19)
11872 (const_int 20) (const_int 21)
11873 (const_int 22) (const_int 23)
11874 (const_int 24) (const_int 25)
11875 (const_int 26) (const_int 27)
11876 (const_int 28) (const_int 29)
11877 (const_int 30) (const_int 31)]))))]
11878 "TARGET_AVX"
11879 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11880 [(set_attr "type" "sselog")
11881 (set_attr "prefix_extra" "1")
11882 (set_attr "length_immediate" "1")
11883 (set_attr "prefix" "vex")
11884 (set_attr "mode" "V8SF")])
11885
11886 (define_insn "vec_set_hi_v32qi"
11887 [(set (match_operand:V32QI 0 "register_operand" "=x")
11888 (vec_concat:V32QI
11889 (vec_select:V16QI
11890 (match_operand:V32QI 1 "register_operand" "x")
11891 (parallel [(const_int 0) (const_int 1)
11892 (const_int 2) (const_int 3)
11893 (const_int 4) (const_int 5)
11894 (const_int 6) (const_int 7)
11895 (const_int 8) (const_int 9)
11896 (const_int 10) (const_int 11)
11897 (const_int 12) (const_int 13)
11898 (const_int 14) (const_int 15)]))
11899 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11900 "TARGET_AVX"
11901 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11902 [(set_attr "type" "sselog")
11903 (set_attr "prefix_extra" "1")
11904 (set_attr "length_immediate" "1")
11905 (set_attr "prefix" "vex")
11906 (set_attr "mode" "V8SF")])
11907
11908 (define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
11909 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11910 (unspec:AVXMODEF2P
11911 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
11912 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11913 (match_dup 0)]
11914 UNSPEC_MASKLOAD))]
11915 "TARGET_AVX"
11916 "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
11917 [(set_attr "type" "sselog1")
11918 (set_attr "prefix_extra" "1")
11919 (set_attr "prefix" "vex")
11920 (set_attr "mode" "<MODE>")])
11921
11922 (define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
11923 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
11924 (unspec:AVXMODEF2P
11925 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11926 (match_operand:AVXMODEF2P 2 "register_operand" "x")
11927 (match_dup 0)]
11928 UNSPEC_MASKSTORE))]
11929 "TARGET_AVX"
11930 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11931 [(set_attr "type" "sselog1")
11932 (set_attr "prefix_extra" "1")
11933 (set_attr "prefix" "vex")
11934 (set_attr "mode" "<MODE>")])
11935
11936 (define_insn_and_split "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
11937 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
11938 (unspec:AVX256MODE2P
11939 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "xm,x")]
11940 UNSPEC_CAST))]
11941 "TARGET_AVX"
11942 "#"
11943 "&& reload_completed"
11944 [(const_int 0)]
11945 {
11946 rtx op1 = operands[1];
11947 if (REG_P (op1))
11948 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
11949 else
11950 op1 = gen_lowpart (<MODE>mode, op1);
11951 emit_move_insn (operands[0], op1);
11952 DONE;
11953 })
11954
11955 (define_expand "vec_init<mode>"
11956 [(match_operand:AVX256MODE 0 "register_operand" "")
11957 (match_operand 1 "" "")]
11958 "TARGET_AVX"
11959 {
11960 ix86_expand_vector_init (false, operands[0], operands[1]);
11961 DONE;
11962 })
11963
11964 (define_insn "*vec_concat<mode>_avx"
11965 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
11966 (vec_concat:AVX256MODE
11967 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
11968 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
11969 "TARGET_AVX"
11970 {
11971 switch (which_alternative)
11972 {
11973 case 0:
11974 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
11975 case 1:
11976 switch (get_attr_mode (insn))
11977 {
11978 case MODE_V8SF:
11979 return "vmovaps\t{%1, %x0|%x0, %1}";
11980 case MODE_V4DF:
11981 return "vmovapd\t{%1, %x0|%x0, %1}";
11982 default:
11983 return "vmovdqa\t{%1, %x0|%x0, %1}";
11984 }
11985 default:
11986 gcc_unreachable ();
11987 }
11988 }
11989 [(set_attr "type" "sselog,ssemov")
11990 (set_attr "prefix_extra" "1,*")
11991 (set_attr "length_immediate" "1,*")
11992 (set_attr "prefix" "vex")
11993 (set_attr "mode" "<avxvecmode>")])
11994
11995 (define_insn "vcvtph2ps"
11996 [(set (match_operand:V4SF 0 "register_operand" "=x")
11997 (vec_select:V4SF
11998 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
11999 UNSPEC_VCVTPH2PS)
12000 (parallel [(const_int 0) (const_int 1)
12001 (const_int 1) (const_int 2)])))]
12002 "TARGET_F16C"
12003 "vcvtph2ps\t{%1, %0|%0, %1}"
12004 [(set_attr "type" "ssecvt")
12005 (set_attr "prefix" "vex")
12006 (set_attr "mode" "V4SF")])
12007
12008 (define_insn "*vcvtph2ps_load"
12009 [(set (match_operand:V4SF 0 "register_operand" "=x")
12010 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12011 UNSPEC_VCVTPH2PS))]
12012 "TARGET_F16C"
12013 "vcvtph2ps\t{%1, %0|%0, %1}"
12014 [(set_attr "type" "ssecvt")
12015 (set_attr "prefix" "vex")
12016 (set_attr "mode" "V8SF")])
12017
12018 (define_insn "vcvtph2ps256"
12019 [(set (match_operand:V8SF 0 "register_operand" "=x")
12020 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12021 UNSPEC_VCVTPH2PS))]
12022 "TARGET_F16C"
12023 "vcvtph2ps\t{%1, %0|%0, %1}"
12024 [(set_attr "type" "ssecvt")
12025 (set_attr "prefix" "vex")
12026 (set_attr "mode" "V8SF")])
12027
12028 (define_expand "vcvtps2ph"
12029 [(set (match_operand:V8HI 0 "register_operand" "")
12030 (vec_concat:V8HI
12031 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12032 (match_operand:SI 2 "immediate_operand" "")]
12033 UNSPEC_VCVTPS2PH)
12034 (match_dup 3)))]
12035 "TARGET_F16C"
12036 "operands[3] = CONST0_RTX (V4HImode);")
12037
12038 (define_insn "*vcvtps2ph"
12039 [(set (match_operand:V8HI 0 "register_operand" "=x")
12040 (vec_concat:V8HI
12041 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12042 (match_operand:SI 2 "immediate_operand" "N")]
12043 UNSPEC_VCVTPS2PH)
12044 (match_operand:V4HI 3 "const0_operand" "")))]
12045 "TARGET_F16C"
12046 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12047 [(set_attr "type" "ssecvt")
12048 (set_attr "prefix" "vex")
12049 (set_attr "mode" "V4SF")])
12050
12051 (define_insn "*vcvtps2ph_store"
12052 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12053 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12054 (match_operand:SI 2 "immediate_operand" "N")]
12055 UNSPEC_VCVTPS2PH))]
12056 "TARGET_F16C"
12057 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12058 [(set_attr "type" "ssecvt")
12059 (set_attr "prefix" "vex")
12060 (set_attr "mode" "V4SF")])
12061
12062 (define_insn "vcvtps2ph256"
12063 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12064 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12065 (match_operand:SI 2 "immediate_operand" "N")]
12066 UNSPEC_VCVTPS2PH))]
12067 "TARGET_F16C"
12068 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12069 [(set_attr "type" "ssecvt")
12070 (set_attr "prefix" "vex")
12071 (set_attr "mode" "V8SF")])