c359aed079192bf33965782542a6e9dd00710d9d
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 ;; Free Software Foundation, Inc.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Instruction suffix for sign and zero extensions.
23 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
24
25 ;; 16 byte integral modes handled by SSE
26 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
27
28 ;; All 16-byte vector modes handled by SSE
29 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
30 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
31
32 ;; 32 byte integral vector modes handled by AVX
33 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
34
35 ;; All 32-byte vector modes handled by AVX
36 (define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
37
38 ;; All QI vector modes handled by AVX
39 (define_mode_iterator AVXMODEQI [V32QI V16QI])
40
41 ;; All DI vector modes handled by AVX
42 (define_mode_iterator AVXMODEDI [V4DI V2DI])
43
44 ;; All vector modes handled by AVX
45 (define_mode_iterator AVXMODE
46 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
47 (define_mode_iterator AVXMODE16
48 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
49
50 ;; Mix-n-match
51 (define_mode_iterator SSEMODE12 [V16QI V8HI])
52 (define_mode_iterator SSEMODE24 [V8HI V4SI])
53 (define_mode_iterator SSEMODE14 [V16QI V4SI])
54 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
55 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
56 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
57 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
58 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
59
60 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
61 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
62 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
63 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
64 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
65 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
66 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
67 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
68 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
69 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
70 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
71
72 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
73
74 ;; Int-float size matches
75 (define_mode_iterator SSEMODE4S [V4SF V4SI])
76 (define_mode_iterator SSEMODE2D [V2DF V2DI])
77
78 ;; Modes handled by integer vcond pattern
79 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
80 (V2DI "TARGET_SSE4_2")])
81
82 ;; Modes handled by vec_extract_even/odd pattern.
83 (define_mode_iterator SSEMODE_EO
84 [(V4SF "TARGET_SSE")
85 (V2DF "TARGET_SSE2")
86 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
87 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
88 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
89
90 ;; Modes handled by storent patterns.
91 (define_mode_iterator STORENT_MODE
92 [(SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
93 (SI "TARGET_SSE2") (V2DI "TARGET_SSE2") (V2DF "TARGET_SSE2")
94 (V4SF "TARGET_SSE")
95 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
96
97 ;; Modes handled by vector float patterns.
98 (define_mode_iterator VEC_FLOAT_MODE
99 [(V2DF "TARGET_SSE2") (V4SF "TARGET_SSE")
100 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
101
102 ;; Modes handled by vector extract patterns.
103 (define_mode_iterator VEC_EXTRACT_MODE
104 [(V2DI "TARGET_SSE") (V4SI "TARGET_SSE")
105 (V8HI "TARGET_SSE") (V16QI "TARGET_SSE")
106 (V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
107 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
108
109 ;; Mapping from float mode to required SSE level
110 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
111
112 ;; Mapping from integer vector mode to mnemonic suffix
113 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
114
115 ;; Mapping of the insn mnemonic suffix
116 (define_mode_attr ssemodesuffix
117 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd") (V8SF "ps") (V4DF "pd")
118 (V8SI "ps") (V4DI "pd")])
119 (define_mode_attr ssescalarmodesuffix
120 [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd") (V8SF "ss") (V8SI "ss")
121 (V4DF "sd") (V4SI "d") (V4DI "sd")])
122
123 ;; Mapping of the max integer size for xop rotate immediate constraint
124 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
125
126 ;; Mapping of vector modes back to the scalar modes
127 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
128 (V16QI "QI") (V8HI "HI")
129 (V4SI "SI") (V2DI "DI")])
130
131 ;; Mapping of vector modes to a vector mode of double size
132 (define_mode_attr ssedoublesizemode
133 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
134 (V8HI "V16HI") (V16QI "V32QI")
135 (V4DF "V8DF") (V8SF "V16SF")
136 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
137
138 ;; Number of scalar elements in each vector type
139 (define_mode_attr ssescalarnum
140 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
141 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
142
143 ;; Mapping for AVX
144 (define_mode_attr avxvecmode
145 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
146 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
147 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
148 (define_mode_attr avxvecpsmode
149 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
150 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
151 (define_mode_attr avxhalfvecmode
152 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
153 (V8SF "V4SF") (V4DF "V2DF")
154 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
155 (define_mode_attr avxscalarmode
156 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
157 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
158 (define_mode_attr avxcvtvecmode
159 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
160 (define_mode_attr avxpermvecmode
161 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
162 (define_mode_attr avxmodesuffixp
163 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
164 (V4DF "pd")])
165 (define_mode_attr avxmodesuffix
166 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
167 (V8SI "256") (V8SF "256") (V4DF "256")])
168
169 ;; Mapping of immediate bits for blend instructions
170 (define_mode_attr blendbits
171 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
172
173 ;; Mapping of immediate bits for pinsr instructions
174 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
175
176 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
177
178 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
179 ;;
180 ;; Move patterns
181 ;;
182 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
183
184 (define_expand "mov<mode>"
185 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
186 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
187 "TARGET_AVX"
188 {
189 ix86_expand_vector_move (<MODE>mode, operands);
190 DONE;
191 })
192
193 (define_insn "*avx_mov<mode>_internal"
194 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
195 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
196 "TARGET_AVX
197 && (register_operand (operands[0], <MODE>mode)
198 || register_operand (operands[1], <MODE>mode))"
199 {
200 switch (which_alternative)
201 {
202 case 0:
203 return standard_sse_constant_opcode (insn, operands[1]);
204 case 1:
205 case 2:
206 switch (get_attr_mode (insn))
207 {
208 case MODE_V8SF:
209 case MODE_V4SF:
210 return "vmovaps\t{%1, %0|%0, %1}";
211 case MODE_V4DF:
212 case MODE_V2DF:
213 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
214 return "vmovaps\t{%1, %0|%0, %1}";
215 else
216 return "vmovapd\t{%1, %0|%0, %1}";
217 default:
218 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
219 return "vmovaps\t{%1, %0|%0, %1}";
220 else
221 return "vmovdqa\t{%1, %0|%0, %1}";
222 }
223 default:
224 gcc_unreachable ();
225 }
226 }
227 [(set_attr "type" "sselog1,ssemov,ssemov")
228 (set_attr "prefix" "vex")
229 (set_attr "mode" "<avxvecmode>")])
230
231 ;; All of these patterns are enabled for SSE1 as well as SSE2.
232 ;; This is essential for maintaining stable calling conventions.
233
234 (define_expand "mov<mode>"
235 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
236 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
237 "TARGET_SSE"
238 {
239 ix86_expand_vector_move (<MODE>mode, operands);
240 DONE;
241 })
242
243 (define_insn "*mov<mode>_internal"
244 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
245 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
246 "TARGET_SSE
247 && (register_operand (operands[0], <MODE>mode)
248 || register_operand (operands[1], <MODE>mode))"
249 {
250 switch (which_alternative)
251 {
252 case 0:
253 return standard_sse_constant_opcode (insn, operands[1]);
254 case 1:
255 case 2:
256 switch (get_attr_mode (insn))
257 {
258 case MODE_V4SF:
259 return "movaps\t{%1, %0|%0, %1}";
260 case MODE_V2DF:
261 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
262 return "movaps\t{%1, %0|%0, %1}";
263 else
264 return "movapd\t{%1, %0|%0, %1}";
265 default:
266 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
267 return "movaps\t{%1, %0|%0, %1}";
268 else
269 return "movdqa\t{%1, %0|%0, %1}";
270 }
271 default:
272 gcc_unreachable ();
273 }
274 }
275 [(set_attr "type" "sselog1,ssemov,ssemov")
276 (set (attr "mode")
277 (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
278 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
279 (and (eq_attr "alternative" "2")
280 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
281 (const_int 0))))
282 (const_string "V4SF")
283 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
284 (const_string "V4SF")
285 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
286 (const_string "V2DF")
287 ]
288 (const_string "TI")))])
289
290 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
291 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
292 ;; from memory, we'd prefer to load the memory directly into the %xmm
293 ;; register. To facilitate this happy circumstance, this pattern won't
294 ;; split until after register allocation. If the 64-bit value didn't
295 ;; come from memory, this is the best we can do. This is much better
296 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
297 ;; from there.
298
299 (define_insn_and_split "movdi_to_sse"
300 [(parallel
301 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
302 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
303 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
304 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
305 "#"
306 "&& reload_completed"
307 [(const_int 0)]
308 {
309 if (register_operand (operands[1], DImode))
310 {
311 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
312 Assemble the 64-bit DImode value in an xmm register. */
313 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
314 gen_rtx_SUBREG (SImode, operands[1], 0)));
315 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
316 gen_rtx_SUBREG (SImode, operands[1], 4)));
317 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
318 operands[2]));
319 }
320 else if (memory_operand (operands[1], DImode))
321 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
322 operands[1], const0_rtx));
323 else
324 gcc_unreachable ();
325 })
326
327 (define_split
328 [(set (match_operand:V4SF 0 "register_operand" "")
329 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
330 "TARGET_SSE && reload_completed"
331 [(set (match_dup 0)
332 (vec_merge:V4SF
333 (vec_duplicate:V4SF (match_dup 1))
334 (match_dup 2)
335 (const_int 1)))]
336 {
337 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
338 operands[2] = CONST0_RTX (V4SFmode);
339 })
340
341 (define_split
342 [(set (match_operand:V2DF 0 "register_operand" "")
343 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
344 "TARGET_SSE2 && reload_completed"
345 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
346 {
347 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
348 operands[2] = CONST0_RTX (DFmode);
349 })
350
351 (define_expand "push<mode>1"
352 [(match_operand:AVX256MODE 0 "register_operand" "")]
353 "TARGET_AVX"
354 {
355 ix86_expand_push (<MODE>mode, operands[0]);
356 DONE;
357 })
358
359 (define_expand "push<mode>1"
360 [(match_operand:SSEMODE16 0 "register_operand" "")]
361 "TARGET_SSE"
362 {
363 ix86_expand_push (<MODE>mode, operands[0]);
364 DONE;
365 })
366
367 (define_expand "movmisalign<mode>"
368 [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
369 (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
370 "TARGET_AVX"
371 {
372 ix86_expand_vector_move_misalign (<MODE>mode, operands);
373 DONE;
374 })
375
376 (define_expand "movmisalign<mode>"
377 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
378 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
379 "TARGET_SSE"
380 {
381 ix86_expand_vector_move_misalign (<MODE>mode, operands);
382 DONE;
383 })
384
385 (define_expand "avx_movu<ssemodesuffix><avxmodesuffix>"
386 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "")
387 (unspec:AVXMODEF2P
388 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "")]
389 UNSPEC_MOVU))]
390 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
391 {
392 if (MEM_P (operands[0]) && MEM_P (operands[1]))
393 operands[1] = force_reg (<MODE>mode, operands[1]);
394 })
395
396 (define_insn "*avx_movu<ssemodesuffix><avxmodesuffix>"
397 [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
398 (unspec:AVXMODEF2P
399 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
400 UNSPEC_MOVU))]
401 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
402 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
403 "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
404 [(set_attr "type" "ssemov")
405 (set_attr "movu" "1")
406 (set_attr "prefix" "vex")
407 (set_attr "mode" "<MODE>")])
408
409 (define_insn "sse2_movq128"
410 [(set (match_operand:V2DI 0 "register_operand" "=x")
411 (vec_concat:V2DI
412 (vec_select:DI
413 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
414 (parallel [(const_int 0)]))
415 (const_int 0)))]
416 "TARGET_SSE2"
417 "%vmovq\t{%1, %0|%0, %1}"
418 [(set_attr "type" "ssemov")
419 (set_attr "prefix" "maybe_vex")
420 (set_attr "mode" "TI")])
421
422 (define_expand "<sse>_movu<ssemodesuffix>"
423 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "")
424 (unspec:SSEMODEF2P
425 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")]
426 UNSPEC_MOVU))]
427 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
428 {
429 if (MEM_P (operands[0]) && MEM_P (operands[1]))
430 operands[1] = force_reg (<MODE>mode, operands[1]);
431 })
432
433 (define_insn "*<sse>_movu<ssemodesuffix>"
434 [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
435 (unspec:SSEMODEF2P
436 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
437 UNSPEC_MOVU))]
438 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
439 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
440 "movu<ssemodesuffix>\t{%1, %0|%0, %1}"
441 [(set_attr "type" "ssemov")
442 (set_attr "movu" "1")
443 (set_attr "mode" "<MODE>")])
444
445 (define_expand "avx_movdqu<avxmodesuffix>"
446 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "")
447 (unspec:AVXMODEQI
448 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "")]
449 UNSPEC_MOVU))]
450 "TARGET_AVX"
451 {
452 if (MEM_P (operands[0]) && MEM_P (operands[1]))
453 operands[1] = force_reg (<MODE>mode, operands[1]);
454 })
455
456 (define_insn "*avx_movdqu<avxmodesuffix>"
457 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
458 (unspec:AVXMODEQI
459 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
460 UNSPEC_MOVU))]
461 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
462 "vmovdqu\t{%1, %0|%0, %1}"
463 [(set_attr "type" "ssemov")
464 (set_attr "movu" "1")
465 (set_attr "prefix" "vex")
466 (set_attr "mode" "<avxvecmode>")])
467
468 (define_expand "sse2_movdqu"
469 [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
470 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "")]
471 UNSPEC_MOVU))]
472 "TARGET_SSE2"
473 {
474 if (MEM_P (operands[0]) && MEM_P (operands[1]))
475 operands[1] = force_reg (V16QImode, operands[1]);
476 })
477
478 (define_insn "*sse2_movdqu"
479 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
480 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
481 UNSPEC_MOVU))]
482 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
483 "movdqu\t{%1, %0|%0, %1}"
484 [(set_attr "type" "ssemov")
485 (set_attr "movu" "1")
486 (set_attr "prefix_data16" "1")
487 (set_attr "mode" "TI")])
488
489 (define_insn "avx_movnt<mode>"
490 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
491 (unspec:AVXMODEF2P
492 [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
493 UNSPEC_MOVNT))]
494 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
495 "vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
496 [(set_attr "type" "ssemov")
497 (set_attr "prefix" "vex")
498 (set_attr "mode" "<MODE>")])
499
500 (define_insn "<sse>_movnt<mode>"
501 [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
502 (unspec:SSEMODEF2P
503 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
504 UNSPEC_MOVNT))]
505 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
506 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
507 [(set_attr "type" "ssemov")
508 (set_attr "mode" "<MODE>")])
509
510 (define_insn "avx_movnt<mode>"
511 [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
512 (unspec:AVXMODEDI
513 [(match_operand:AVXMODEDI 1 "register_operand" "x")]
514 UNSPEC_MOVNT))]
515 "TARGET_AVX"
516 "vmovntdq\t{%1, %0|%0, %1}"
517 [(set_attr "type" "ssecvt")
518 (set_attr "prefix" "vex")
519 (set_attr "mode" "<avxvecmode>")])
520
521 (define_insn "sse2_movntv2di"
522 [(set (match_operand:V2DI 0 "memory_operand" "=m")
523 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
524 UNSPEC_MOVNT))]
525 "TARGET_SSE2"
526 "movntdq\t{%1, %0|%0, %1}"
527 [(set_attr "type" "ssemov")
528 (set_attr "prefix_data16" "1")
529 (set_attr "mode" "TI")])
530
531 (define_insn "sse2_movntsi"
532 [(set (match_operand:SI 0 "memory_operand" "=m")
533 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
534 UNSPEC_MOVNT))]
535 "TARGET_SSE2"
536 "movnti\t{%1, %0|%0, %1}"
537 [(set_attr "type" "ssemov")
538 (set_attr "prefix_data16" "0")
539 (set_attr "mode" "V2DF")])
540
541 (define_insn "avx_lddqu<avxmodesuffix>"
542 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
543 (unspec:AVXMODEQI
544 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
545 UNSPEC_LDDQU))]
546 "TARGET_AVX"
547 "vlddqu\t{%1, %0|%0, %1}"
548 [(set_attr "type" "ssecvt")
549 (set_attr "movu" "1")
550 (set_attr "prefix" "vex")
551 (set_attr "mode" "<avxvecmode>")])
552
553 (define_insn "sse3_lddqu"
554 [(set (match_operand:V16QI 0 "register_operand" "=x")
555 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
556 UNSPEC_LDDQU))]
557 "TARGET_SSE3"
558 "lddqu\t{%1, %0|%0, %1}"
559 [(set_attr "type" "ssemov")
560 (set_attr "movu" "1")
561 (set_attr "prefix_data16" "0")
562 (set_attr "prefix_rep" "1")
563 (set_attr "mode" "TI")])
564
565 ; Expand patterns for non-temporal stores. At the moment, only those
566 ; that directly map to insns are defined; it would be possible to
567 ; define patterns for other modes that would expand to several insns.
568
569 (define_expand "storent<mode>"
570 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
571 (unspec:STORENT_MODE
572 [(match_operand:STORENT_MODE 1 "register_operand" "")]
573 UNSPEC_MOVNT))])
574
575 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
576 ;;
577 ;; Parallel floating point arithmetic
578 ;;
579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
580
581 (define_expand "<code><mode>2"
582 [(set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
583 (absneg:VEC_FLOAT_MODE
584 (match_operand:VEC_FLOAT_MODE 1 "register_operand" "")))]
585 ""
586 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
587
588 (define_insn_and_split "*avx_absneg<mode>2"
589 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
590 (match_operator:AVXMODEF2P 3 "absneg_operator"
591 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "x,m")]))
592 (use (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm,x"))]
593 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
594 "#"
595 "&& reload_completed"
596 [(const_int 0)]
597 {
598 rtx t;
599
600 if (MEM_P (operands[1]))
601 t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
602 <MODE>mode, operands[2], operands[1]);
603 else
604 t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
605 <MODE>mode, operands[1], operands[2]);
606 t = gen_rtx_SET (VOIDmode, operands[0], t);
607 emit_insn (t);
608 DONE;
609 })
610
611 (define_insn_and_split "*sse_absneg<mode>2"
612 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
613 (match_operator:SSEMODEF2P 3 "absneg_operator"
614 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,xm")]))
615 (use (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm,0"))]
616 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
617 "#"
618 "&& reload_completed"
619 [(const_int 0)]
620 {
621 rtx t;
622
623 t = operands[rtx_equal_p (operands[0], operands[1]) ? 2 : 1];
624 t = gen_rtx_fmt_ee (GET_CODE (operands[3]) == NEG ? XOR : AND,
625 <MODE>mode, operands[0], t);
626 t = gen_rtx_SET (VOIDmode, operands[0], t);
627 emit_insn (t);
628 DONE;
629 })
630
631 (define_expand "<plusminus_insn><mode>3"
632 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
633 (plusminus:AVX256MODEF2P
634 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
635 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
636 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
637 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
638
639 (define_insn "*avx_<plusminus_insn><mode>3"
640 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
641 (plusminus:AVXMODEF2P
642 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
643 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
644 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
645 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
646 "v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
647 [(set_attr "type" "sseadd")
648 (set_attr "prefix" "vex")
649 (set_attr "mode" "<avxvecmode>")])
650
651 (define_expand "<plusminus_insn><mode>3"
652 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
653 (plusminus:SSEMODEF2P
654 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
655 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
656 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
657 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
658
659 (define_insn "*<plusminus_insn><mode>3"
660 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
661 (plusminus:SSEMODEF2P
662 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
663 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
664 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
665 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
666 "<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}"
667 [(set_attr "type" "sseadd")
668 (set_attr "mode" "<MODE>")])
669
670 (define_insn "*avx_vm<plusminus_insn><mode>3"
671 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
672 (vec_merge:SSEMODEF2P
673 (plusminus:SSEMODEF2P
674 (match_operand:SSEMODEF2P 1 "register_operand" "x")
675 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
676 (match_dup 1)
677 (const_int 1)))]
678 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
679 "v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
680 [(set_attr "type" "sseadd")
681 (set_attr "prefix" "vex")
682 (set_attr "mode" "<ssescalarmode>")])
683
684 (define_insn "<sse>_vm<plusminus_insn><mode>3"
685 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
686 (vec_merge:SSEMODEF2P
687 (plusminus:SSEMODEF2P
688 (match_operand:SSEMODEF2P 1 "register_operand" "0")
689 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
690 (match_dup 1)
691 (const_int 1)))]
692 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
693 "<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
694 [(set_attr "type" "sseadd")
695 (set_attr "mode" "<ssescalarmode>")])
696
697 (define_expand "mul<mode>3"
698 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
699 (mult:AVX256MODEF2P
700 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
701 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
702 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
703 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
704
705 (define_insn "*avx_mul<mode>3"
706 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
707 (mult:AVXMODEF2P
708 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
709 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
710 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
711 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
712 "vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
713 [(set_attr "type" "ssemul")
714 (set_attr "prefix" "vex")
715 (set_attr "mode" "<avxvecmode>")])
716
717 (define_expand "mul<mode>3"
718 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
719 (mult:SSEMODEF2P
720 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
721 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
722 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
723 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
724
725 (define_insn "*mul<mode>3"
726 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
727 (mult:SSEMODEF2P
728 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
729 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
730 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
731 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
732 "mul<ssemodesuffix>\t{%2, %0|%0, %2}"
733 [(set_attr "type" "ssemul")
734 (set_attr "mode" "<MODE>")])
735
736 (define_insn "*avx_vmmul<mode>3"
737 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
738 (vec_merge:SSEMODEF2P
739 (mult:SSEMODEF2P
740 (match_operand:SSEMODEF2P 1 "register_operand" "x")
741 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
742 (match_dup 1)
743 (const_int 1)))]
744 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
745 "vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
746 [(set_attr "type" "ssemul")
747 (set_attr "prefix" "vex")
748 (set_attr "mode" "<ssescalarmode>")])
749
750 (define_insn "<sse>_vmmul<mode>3"
751 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
752 (vec_merge:SSEMODEF2P
753 (mult:SSEMODEF2P
754 (match_operand:SSEMODEF2P 1 "register_operand" "0")
755 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
756 (match_dup 1)
757 (const_int 1)))]
758 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
759 "mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
760 [(set_attr "type" "ssemul")
761 (set_attr "mode" "<ssescalarmode>")])
762
763 (define_expand "divv8sf3"
764 [(set (match_operand:V8SF 0 "register_operand" "")
765 (div:V8SF (match_operand:V8SF 1 "register_operand" "")
766 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
767 "TARGET_AVX"
768 {
769 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
770
771 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
772 && flag_finite_math_only && !flag_trapping_math
773 && flag_unsafe_math_optimizations)
774 {
775 ix86_emit_swdivsf (operands[0], operands[1],
776 operands[2], V8SFmode);
777 DONE;
778 }
779 })
780
781 (define_expand "divv4df3"
782 [(set (match_operand:V4DF 0 "register_operand" "")
783 (div:V4DF (match_operand:V4DF 1 "register_operand" "")
784 (match_operand:V4DF 2 "nonimmediate_operand" "")))]
785 "TARGET_AVX"
786 "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
787
788 (define_insn "avx_div<mode>3"
789 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
790 (div:AVXMODEF2P
791 (match_operand:AVXMODEF2P 1 "register_operand" "x")
792 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
793 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
794 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
795 [(set_attr "type" "ssediv")
796 (set_attr "prefix" "vex")
797 (set_attr "mode" "<MODE>")])
798
799 (define_expand "divv4sf3"
800 [(set (match_operand:V4SF 0 "register_operand" "")
801 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
802 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
803 "TARGET_SSE"
804 {
805 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
806 && flag_finite_math_only && !flag_trapping_math
807 && flag_unsafe_math_optimizations)
808 {
809 ix86_emit_swdivsf (operands[0], operands[1],
810 operands[2], V4SFmode);
811 DONE;
812 }
813 })
814
815 (define_expand "divv2df3"
816 [(set (match_operand:V2DF 0 "register_operand" "")
817 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
818 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
819 "TARGET_SSE2")
820
821 (define_insn "*avx_div<mode>3"
822 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
823 (div:SSEMODEF2P
824 (match_operand:SSEMODEF2P 1 "register_operand" "x")
825 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
826 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
827 "vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
828 [(set_attr "type" "ssediv")
829 (set_attr "prefix" "vex")
830 (set_attr "mode" "<MODE>")])
831
832 (define_insn "<sse>_div<mode>3"
833 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
834 (div:SSEMODEF2P
835 (match_operand:SSEMODEF2P 1 "register_operand" "0")
836 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
837 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
838 "div<ssemodesuffix>\t{%2, %0|%0, %2}"
839 [(set_attr "type" "ssediv")
840 (set_attr "mode" "<MODE>")])
841
842 (define_insn "*avx_vmdiv<mode>3"
843 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
844 (vec_merge:SSEMODEF2P
845 (div:SSEMODEF2P
846 (match_operand:SSEMODEF2P 1 "register_operand" "x")
847 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
848 (match_dup 1)
849 (const_int 1)))]
850 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
851 "vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
852 [(set_attr "type" "ssediv")
853 (set_attr "prefix" "vex")
854 (set_attr "mode" "<ssescalarmode>")])
855
856 (define_insn "<sse>_vmdiv<mode>3"
857 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
858 (vec_merge:SSEMODEF2P
859 (div:SSEMODEF2P
860 (match_operand:SSEMODEF2P 1 "register_operand" "0")
861 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
862 (match_dup 1)
863 (const_int 1)))]
864 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
865 "div<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
866 [(set_attr "type" "ssediv")
867 (set_attr "mode" "<ssescalarmode>")])
868
869 (define_insn "avx_rcpv8sf2"
870 [(set (match_operand:V8SF 0 "register_operand" "=x")
871 (unspec:V8SF
872 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
873 "TARGET_AVX"
874 "vrcpps\t{%1, %0|%0, %1}"
875 [(set_attr "type" "sse")
876 (set_attr "prefix" "vex")
877 (set_attr "mode" "V8SF")])
878
879 (define_insn "sse_rcpv4sf2"
880 [(set (match_operand:V4SF 0 "register_operand" "=x")
881 (unspec:V4SF
882 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
883 "TARGET_SSE"
884 "%vrcpps\t{%1, %0|%0, %1}"
885 [(set_attr "type" "sse")
886 (set_attr "atom_sse_attr" "rcp")
887 (set_attr "prefix" "maybe_vex")
888 (set_attr "mode" "V4SF")])
889
890 (define_insn "*avx_vmrcpv4sf2"
891 [(set (match_operand:V4SF 0 "register_operand" "=x")
892 (vec_merge:V4SF
893 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
894 UNSPEC_RCP)
895 (match_operand:V4SF 2 "register_operand" "x")
896 (const_int 1)))]
897 "TARGET_AVX"
898 "vrcpss\t{%1, %2, %0|%0, %2, %1}"
899 [(set_attr "type" "sse")
900 (set_attr "prefix" "vex")
901 (set_attr "mode" "SF")])
902
903 (define_insn "sse_vmrcpv4sf2"
904 [(set (match_operand:V4SF 0 "register_operand" "=x")
905 (vec_merge:V4SF
906 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
907 UNSPEC_RCP)
908 (match_operand:V4SF 2 "register_operand" "0")
909 (const_int 1)))]
910 "TARGET_SSE"
911 "rcpss\t{%1, %0|%0, %1}"
912 [(set_attr "type" "sse")
913 (set_attr "atom_sse_attr" "rcp")
914 (set_attr "mode" "SF")])
915
916 (define_expand "sqrtv8sf2"
917 [(set (match_operand:V8SF 0 "register_operand" "")
918 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
919 "TARGET_AVX"
920 {
921 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
922 && flag_finite_math_only && !flag_trapping_math
923 && flag_unsafe_math_optimizations)
924 {
925 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
926 DONE;
927 }
928 })
929
930 (define_insn "avx_sqrtv8sf2"
931 [(set (match_operand:V8SF 0 "register_operand" "=x")
932 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
933 "TARGET_AVX"
934 "vsqrtps\t{%1, %0|%0, %1}"
935 [(set_attr "type" "sse")
936 (set_attr "prefix" "vex")
937 (set_attr "mode" "V8SF")])
938
939 (define_expand "sqrtv4sf2"
940 [(set (match_operand:V4SF 0 "register_operand" "")
941 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
942 "TARGET_SSE"
943 {
944 if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
945 && flag_finite_math_only && !flag_trapping_math
946 && flag_unsafe_math_optimizations)
947 {
948 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
949 DONE;
950 }
951 })
952
953 (define_insn "sse_sqrtv4sf2"
954 [(set (match_operand:V4SF 0 "register_operand" "=x")
955 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
956 "TARGET_SSE"
957 "%vsqrtps\t{%1, %0|%0, %1}"
958 [(set_attr "type" "sse")
959 (set_attr "atom_sse_attr" "sqrt")
960 (set_attr "prefix" "maybe_vex")
961 (set_attr "mode" "V4SF")])
962
963 (define_insn "sqrtv4df2"
964 [(set (match_operand:V4DF 0 "register_operand" "=x")
965 (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
966 "TARGET_AVX"
967 "vsqrtpd\t{%1, %0|%0, %1}"
968 [(set_attr "type" "sse")
969 (set_attr "prefix" "vex")
970 (set_attr "mode" "V4DF")])
971
972 (define_insn "sqrtv2df2"
973 [(set (match_operand:V2DF 0 "register_operand" "=x")
974 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
975 "TARGET_SSE2"
976 "%vsqrtpd\t{%1, %0|%0, %1}"
977 [(set_attr "type" "sse")
978 (set_attr "prefix" "maybe_vex")
979 (set_attr "mode" "V2DF")])
980
981 (define_insn "*avx_vmsqrt<mode>2"
982 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
983 (vec_merge:SSEMODEF2P
984 (sqrt:SSEMODEF2P
985 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
986 (match_operand:SSEMODEF2P 2 "register_operand" "x")
987 (const_int 1)))]
988 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
989 "vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
990 [(set_attr "type" "sse")
991 (set_attr "prefix" "vex")
992 (set_attr "mode" "<ssescalarmode>")])
993
994 (define_insn "<sse>_vmsqrt<mode>2"
995 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
996 (vec_merge:SSEMODEF2P
997 (sqrt:SSEMODEF2P
998 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
999 (match_operand:SSEMODEF2P 2 "register_operand" "0")
1000 (const_int 1)))]
1001 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1002 "sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
1003 [(set_attr "type" "sse")
1004 (set_attr "atom_sse_attr" "sqrt")
1005 (set_attr "mode" "<ssescalarmode>")])
1006
1007 (define_expand "rsqrtv8sf2"
1008 [(set (match_operand:V8SF 0 "register_operand" "")
1009 (unspec:V8SF
1010 [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
1011 "TARGET_AVX && TARGET_SSE_MATH"
1012 {
1013 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
1014 DONE;
1015 })
1016
1017 (define_insn "avx_rsqrtv8sf2"
1018 [(set (match_operand:V8SF 0 "register_operand" "=x")
1019 (unspec:V8SF
1020 [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1021 "TARGET_AVX"
1022 "vrsqrtps\t{%1, %0|%0, %1}"
1023 [(set_attr "type" "sse")
1024 (set_attr "prefix" "vex")
1025 (set_attr "mode" "V8SF")])
1026
1027 (define_expand "rsqrtv4sf2"
1028 [(set (match_operand:V4SF 0 "register_operand" "")
1029 (unspec:V4SF
1030 [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
1031 "TARGET_SSE_MATH"
1032 {
1033 ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
1034 DONE;
1035 })
1036
1037 (define_insn "sse_rsqrtv4sf2"
1038 [(set (match_operand:V4SF 0 "register_operand" "=x")
1039 (unspec:V4SF
1040 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1041 "TARGET_SSE"
1042 "%vrsqrtps\t{%1, %0|%0, %1}"
1043 [(set_attr "type" "sse")
1044 (set_attr "prefix" "maybe_vex")
1045 (set_attr "mode" "V4SF")])
1046
1047 (define_insn "*avx_vmrsqrtv4sf2"
1048 [(set (match_operand:V4SF 0 "register_operand" "=x")
1049 (vec_merge:V4SF
1050 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1051 UNSPEC_RSQRT)
1052 (match_operand:V4SF 2 "register_operand" "x")
1053 (const_int 1)))]
1054 "TARGET_AVX"
1055 "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1056 [(set_attr "type" "sse")
1057 (set_attr "prefix" "vex")
1058 (set_attr "mode" "SF")])
1059
1060 (define_insn "sse_vmrsqrtv4sf2"
1061 [(set (match_operand:V4SF 0 "register_operand" "=x")
1062 (vec_merge:V4SF
1063 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1064 UNSPEC_RSQRT)
1065 (match_operand:V4SF 2 "register_operand" "0")
1066 (const_int 1)))]
1067 "TARGET_SSE"
1068 "rsqrtss\t{%1, %0|%0, %1}"
1069 [(set_attr "type" "sse")
1070 (set_attr "mode" "SF")])
1071
1072 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1073 ;; isn't really correct, as those rtl operators aren't defined when
1074 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1075
1076 (define_expand "<code><mode>3"
1077 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1078 (smaxmin:AVX256MODEF2P
1079 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1080 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1081 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1082 {
1083 if (!flag_finite_math_only)
1084 operands[1] = force_reg (<MODE>mode, operands[1]);
1085 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1086 })
1087
1088 (define_expand "<code><mode>3"
1089 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1090 (smaxmin:SSEMODEF2P
1091 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1092 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1093 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1094 {
1095 if (!flag_finite_math_only)
1096 operands[1] = force_reg (<MODE>mode, operands[1]);
1097 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1098 })
1099
1100 (define_insn "*avx_<code><mode>3_finite"
1101 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1102 (smaxmin:AVXMODEF2P
1103 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1104 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1105 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1106 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1107 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1108 [(set_attr "type" "sseadd")
1109 (set_attr "prefix" "vex")
1110 (set_attr "mode" "<MODE>")])
1111
1112 (define_insn "*<code><mode>3_finite"
1113 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1114 (smaxmin:SSEMODEF2P
1115 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1116 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1117 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
1118 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1119 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1120 [(set_attr "type" "sseadd")
1121 (set_attr "mode" "<MODE>")])
1122
1123 (define_insn "*avx_<code><mode>3"
1124 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1125 (smaxmin:AVXMODEF2P
1126 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1127 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1128 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1129 "v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1130 [(set_attr "type" "sseadd")
1131 (set_attr "prefix" "vex")
1132 (set_attr "mode" "<avxvecmode>")])
1133
1134 (define_insn "*<code><mode>3"
1135 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1136 (smaxmin:SSEMODEF2P
1137 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1138 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1139 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1140 "<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}"
1141 [(set_attr "type" "sseadd")
1142 (set_attr "mode" "<MODE>")])
1143
1144 (define_insn "*avx_vm<code><mode>3"
1145 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1146 (vec_merge:SSEMODEF2P
1147 (smaxmin:SSEMODEF2P
1148 (match_operand:SSEMODEF2P 1 "register_operand" "x")
1149 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1150 (match_dup 1)
1151 (const_int 1)))]
1152 "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
1153 "v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1154 [(set_attr "type" "sse")
1155 (set_attr "prefix" "vex")
1156 (set_attr "mode" "<ssescalarmode>")])
1157
1158 (define_insn "<sse>_vm<code><mode>3"
1159 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1160 (vec_merge:SSEMODEF2P
1161 (smaxmin:SSEMODEF2P
1162 (match_operand:SSEMODEF2P 1 "register_operand" "0")
1163 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1164 (match_dup 1)
1165 (const_int 1)))]
1166 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1167 "<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1168 [(set_attr "type" "sseadd")
1169 (set_attr "mode" "<ssescalarmode>")])
1170
1171 ;; These versions of the min/max patterns implement exactly the operations
1172 ;; min = (op1 < op2 ? op1 : op2)
1173 ;; max = (!(op1 < op2) ? op1 : op2)
1174 ;; Their operands are not commutative, and thus they may be used in the
1175 ;; presence of -0.0 and NaN.
1176
1177 (define_insn "*avx_ieee_smin<mode>3"
1178 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1179 (unspec:AVXMODEF2P
1180 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1181 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1182 UNSPEC_IEEE_MIN))]
1183 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1184 "vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1185 [(set_attr "type" "sseadd")
1186 (set_attr "prefix" "vex")
1187 (set_attr "mode" "<avxvecmode>")])
1188
1189 (define_insn "*avx_ieee_smax<mode>3"
1190 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1191 (unspec:AVXMODEF2P
1192 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1193 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1194 UNSPEC_IEEE_MAX))]
1195 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1196 "vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1197 [(set_attr "type" "sseadd")
1198 (set_attr "prefix" "vex")
1199 (set_attr "mode" "<avxvecmode>")])
1200
1201 (define_insn "*ieee_smin<mode>3"
1202 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1203 (unspec:SSEMODEF2P
1204 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1205 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1206 UNSPEC_IEEE_MIN))]
1207 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1208 "min<ssemodesuffix>\t{%2, %0|%0, %2}"
1209 [(set_attr "type" "sseadd")
1210 (set_attr "mode" "<MODE>")])
1211
1212 (define_insn "*ieee_smax<mode>3"
1213 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1214 (unspec:SSEMODEF2P
1215 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1216 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1217 UNSPEC_IEEE_MAX))]
1218 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1219 "max<ssemodesuffix>\t{%2, %0|%0, %2}"
1220 [(set_attr "type" "sseadd")
1221 (set_attr "mode" "<MODE>")])
1222
1223 (define_insn "avx_addsubv8sf3"
1224 [(set (match_operand:V8SF 0 "register_operand" "=x")
1225 (vec_merge:V8SF
1226 (plus:V8SF
1227 (match_operand:V8SF 1 "register_operand" "x")
1228 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1229 (minus:V8SF (match_dup 1) (match_dup 2))
1230 (const_int 170)))]
1231 "TARGET_AVX"
1232 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1233 [(set_attr "type" "sseadd")
1234 (set_attr "prefix" "vex")
1235 (set_attr "mode" "V8SF")])
1236
1237 (define_insn "avx_addsubv4df3"
1238 [(set (match_operand:V4DF 0 "register_operand" "=x")
1239 (vec_merge:V4DF
1240 (plus:V4DF
1241 (match_operand:V4DF 1 "register_operand" "x")
1242 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1243 (minus:V4DF (match_dup 1) (match_dup 2))
1244 (const_int 10)))]
1245 "TARGET_AVX"
1246 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1247 [(set_attr "type" "sseadd")
1248 (set_attr "prefix" "vex")
1249 (set_attr "mode" "V4DF")])
1250
1251 (define_insn "*avx_addsubv4sf3"
1252 [(set (match_operand:V4SF 0 "register_operand" "=x")
1253 (vec_merge:V4SF
1254 (plus:V4SF
1255 (match_operand:V4SF 1 "register_operand" "x")
1256 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1257 (minus:V4SF (match_dup 1) (match_dup 2))
1258 (const_int 10)))]
1259 "TARGET_AVX"
1260 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1261 [(set_attr "type" "sseadd")
1262 (set_attr "prefix" "vex")
1263 (set_attr "mode" "V4SF")])
1264
1265 (define_insn "sse3_addsubv4sf3"
1266 [(set (match_operand:V4SF 0 "register_operand" "=x")
1267 (vec_merge:V4SF
1268 (plus:V4SF
1269 (match_operand:V4SF 1 "register_operand" "0")
1270 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1271 (minus:V4SF (match_dup 1) (match_dup 2))
1272 (const_int 10)))]
1273 "TARGET_SSE3"
1274 "addsubps\t{%2, %0|%0, %2}"
1275 [(set_attr "type" "sseadd")
1276 (set_attr "prefix_rep" "1")
1277 (set_attr "mode" "V4SF")])
1278
1279 (define_insn "*avx_addsubv2df3"
1280 [(set (match_operand:V2DF 0 "register_operand" "=x")
1281 (vec_merge:V2DF
1282 (plus:V2DF
1283 (match_operand:V2DF 1 "register_operand" "x")
1284 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1285 (minus:V2DF (match_dup 1) (match_dup 2))
1286 (const_int 2)))]
1287 "TARGET_AVX"
1288 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1289 [(set_attr "type" "sseadd")
1290 (set_attr "prefix" "vex")
1291 (set_attr "mode" "V2DF")])
1292
1293 (define_insn "sse3_addsubv2df3"
1294 [(set (match_operand:V2DF 0 "register_operand" "=x")
1295 (vec_merge:V2DF
1296 (plus:V2DF
1297 (match_operand:V2DF 1 "register_operand" "0")
1298 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1299 (minus:V2DF (match_dup 1) (match_dup 2))
1300 (const_int 2)))]
1301 "TARGET_SSE3"
1302 "addsubpd\t{%2, %0|%0, %2}"
1303 [(set_attr "type" "sseadd")
1304 (set_attr "atom_unit" "complex")
1305 (set_attr "mode" "V2DF")])
1306
1307 (define_insn "avx_h<plusminus_insn>v4df3"
1308 [(set (match_operand:V4DF 0 "register_operand" "=x")
1309 (vec_concat:V4DF
1310 (vec_concat:V2DF
1311 (plusminus:DF
1312 (vec_select:DF
1313 (match_operand:V4DF 1 "register_operand" "x")
1314 (parallel [(const_int 0)]))
1315 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1316 (plusminus:DF
1317 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1318 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1319 (vec_concat:V2DF
1320 (plusminus:DF
1321 (vec_select:DF
1322 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1323 (parallel [(const_int 0)]))
1324 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1325 (plusminus:DF
1326 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1327 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1328 "TARGET_AVX"
1329 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1330 [(set_attr "type" "sseadd")
1331 (set_attr "prefix" "vex")
1332 (set_attr "mode" "V4DF")])
1333
1334 (define_insn "avx_h<plusminus_insn>v8sf3"
1335 [(set (match_operand:V8SF 0 "register_operand" "=x")
1336 (vec_concat:V8SF
1337 (vec_concat:V4SF
1338 (vec_concat:V2SF
1339 (plusminus:SF
1340 (vec_select:SF
1341 (match_operand:V8SF 1 "register_operand" "x")
1342 (parallel [(const_int 0)]))
1343 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1344 (plusminus:SF
1345 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1346 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1347 (vec_concat:V2SF
1348 (plusminus:SF
1349 (vec_select:SF
1350 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1351 (parallel [(const_int 0)]))
1352 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1353 (plusminus:SF
1354 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1355 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1356 (vec_concat:V4SF
1357 (vec_concat:V2SF
1358 (plusminus:SF
1359 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1360 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1361 (plusminus:SF
1362 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1363 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1364 (vec_concat:V2SF
1365 (plusminus:SF
1366 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1367 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1368 (plusminus:SF
1369 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1370 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1371 "TARGET_AVX"
1372 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1373 [(set_attr "type" "sseadd")
1374 (set_attr "prefix" "vex")
1375 (set_attr "mode" "V8SF")])
1376
1377 (define_insn "*avx_h<plusminus_insn>v4sf3"
1378 [(set (match_operand:V4SF 0 "register_operand" "=x")
1379 (vec_concat:V4SF
1380 (vec_concat:V2SF
1381 (plusminus:SF
1382 (vec_select:SF
1383 (match_operand:V4SF 1 "register_operand" "x")
1384 (parallel [(const_int 0)]))
1385 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1386 (plusminus:SF
1387 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1388 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1389 (vec_concat:V2SF
1390 (plusminus:SF
1391 (vec_select:SF
1392 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1393 (parallel [(const_int 0)]))
1394 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1395 (plusminus:SF
1396 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1397 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1398 "TARGET_AVX"
1399 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1400 [(set_attr "type" "sseadd")
1401 (set_attr "prefix" "vex")
1402 (set_attr "mode" "V4SF")])
1403
1404 (define_insn "sse3_h<plusminus_insn>v4sf3"
1405 [(set (match_operand:V4SF 0 "register_operand" "=x")
1406 (vec_concat:V4SF
1407 (vec_concat:V2SF
1408 (plusminus:SF
1409 (vec_select:SF
1410 (match_operand:V4SF 1 "register_operand" "0")
1411 (parallel [(const_int 0)]))
1412 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1413 (plusminus:SF
1414 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1415 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1416 (vec_concat:V2SF
1417 (plusminus:SF
1418 (vec_select:SF
1419 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1420 (parallel [(const_int 0)]))
1421 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1422 (plusminus:SF
1423 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1424 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1425 "TARGET_SSE3"
1426 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1427 [(set_attr "type" "sseadd")
1428 (set_attr "atom_unit" "complex")
1429 (set_attr "prefix_rep" "1")
1430 (set_attr "mode" "V4SF")])
1431
1432 (define_insn "*avx_h<plusminus_insn>v2df3"
1433 [(set (match_operand:V2DF 0 "register_operand" "=x")
1434 (vec_concat:V2DF
1435 (plusminus:DF
1436 (vec_select:DF
1437 (match_operand:V2DF 1 "register_operand" "x")
1438 (parallel [(const_int 0)]))
1439 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1440 (plusminus:DF
1441 (vec_select:DF
1442 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1443 (parallel [(const_int 0)]))
1444 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1445 "TARGET_AVX"
1446 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1447 [(set_attr "type" "sseadd")
1448 (set_attr "prefix" "vex")
1449 (set_attr "mode" "V2DF")])
1450
1451 (define_insn "sse3_h<plusminus_insn>v2df3"
1452 [(set (match_operand:V2DF 0 "register_operand" "=x")
1453 (vec_concat:V2DF
1454 (plusminus:DF
1455 (vec_select:DF
1456 (match_operand:V2DF 1 "register_operand" "0")
1457 (parallel [(const_int 0)]))
1458 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1459 (plusminus:DF
1460 (vec_select:DF
1461 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1462 (parallel [(const_int 0)]))
1463 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1464 "TARGET_SSE3"
1465 "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
1466 [(set_attr "type" "sseadd")
1467 (set_attr "mode" "V2DF")])
1468
1469 (define_expand "reduc_splus_v8sf"
1470 [(match_operand:V8SF 0 "register_operand" "")
1471 (match_operand:V8SF 1 "register_operand" "")]
1472 "TARGET_AVX"
1473 {
1474 rtx tmp = gen_reg_rtx (V8SFmode);
1475 rtx tmp2 = gen_reg_rtx (V8SFmode);
1476 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1477 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1478 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1479 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1480 DONE;
1481 })
1482
1483 (define_expand "reduc_splus_v4sf"
1484 [(match_operand:V4SF 0 "register_operand" "")
1485 (match_operand:V4SF 1 "register_operand" "")]
1486 "TARGET_SSE"
1487 {
1488 if (TARGET_SSE3)
1489 {
1490 rtx tmp = gen_reg_rtx (V4SFmode);
1491 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1492 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1493 }
1494 else
1495 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1496 DONE;
1497 })
1498
1499 (define_expand "reduc_splus_v4df"
1500 [(match_operand:V4DF 0 "register_operand" "")
1501 (match_operand:V4DF 1 "register_operand" "")]
1502 "TARGET_AVX"
1503 {
1504 rtx tmp = gen_reg_rtx (V4DFmode);
1505 rtx tmp2 = gen_reg_rtx (V4DFmode);
1506 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1507 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1508 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1509 DONE;
1510 })
1511
1512 (define_expand "reduc_splus_v2df"
1513 [(match_operand:V2DF 0 "register_operand" "")
1514 (match_operand:V2DF 1 "register_operand" "")]
1515 "TARGET_SSE3"
1516 {
1517 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1518 DONE;
1519 })
1520
1521 (define_expand "reduc_smax_v4sf"
1522 [(match_operand:V4SF 0 "register_operand" "")
1523 (match_operand:V4SF 1 "register_operand" "")]
1524 "TARGET_SSE"
1525 {
1526 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1527 DONE;
1528 })
1529
1530 (define_expand "reduc_smin_v4sf"
1531 [(match_operand:V4SF 0 "register_operand" "")
1532 (match_operand:V4SF 1 "register_operand" "")]
1533 "TARGET_SSE"
1534 {
1535 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1536 DONE;
1537 })
1538
1539 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1540 ;;
1541 ;; Parallel floating point comparisons
1542 ;;
1543 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1544
1545 (define_insn "avx_cmp<ssemodesuffix><mode>3"
1546 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1547 (unspec:AVXMODEF2P
1548 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1549 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1550 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1551 UNSPEC_PCMP))]
1552 "TARGET_AVX"
1553 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1554 [(set_attr "type" "ssecmp")
1555 (set_attr "length_immediate" "1")
1556 (set_attr "prefix" "vex")
1557 (set_attr "mode" "<MODE>")])
1558
1559 (define_insn "avx_cmp<ssescalarmodesuffix><mode>3"
1560 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1561 (vec_merge:SSEMODEF2P
1562 (unspec:SSEMODEF2P
1563 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1564 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1565 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1566 UNSPEC_PCMP)
1567 (match_dup 1)
1568 (const_int 1)))]
1569 "TARGET_AVX"
1570 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1571 [(set_attr "type" "ssecmp")
1572 (set_attr "length_immediate" "1")
1573 (set_attr "prefix" "vex")
1574 (set_attr "mode" "<ssescalarmode>")])
1575
1576 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1577 ;; may generate 256bit vector compare instructions.
1578 (define_insn "*avx_maskcmp<mode>3"
1579 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1580 (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1581 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1582 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1583 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1584 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1585 [(set_attr "type" "ssecmp")
1586 (set_attr "prefix" "vex")
1587 (set_attr "length_immediate" "1")
1588 (set_attr "mode" "<avxvecmode>")])
1589
1590 (define_insn "<sse>_maskcmp<mode>3"
1591 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1592 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1593 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1594 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1595 "!TARGET_XOP
1596 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1597 "cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}"
1598 [(set_attr "type" "ssecmp")
1599 (set_attr "length_immediate" "1")
1600 (set_attr "mode" "<MODE>")])
1601
1602 (define_insn "*avx_vmmaskcmp<mode>3"
1603 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1604 (vec_merge:SSEMODEF2P
1605 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1606 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1607 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1608 (match_dup 1)
1609 (const_int 1)))]
1610 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1611 "vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1612 [(set_attr "type" "ssecmp")
1613 (set_attr "prefix" "vex")
1614 (set_attr "mode" "<ssescalarmode>")])
1615
1616 (define_insn "<sse>_vmmaskcmp<mode>3"
1617 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1618 (vec_merge:SSEMODEF2P
1619 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1620 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1621 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1622 (match_dup 1)
1623 (const_int 1)))]
1624 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1625 "cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
1626 [(set_attr "type" "ssecmp")
1627 (set_attr "length_immediate" "1")
1628 (set_attr "mode" "<ssescalarmode>")])
1629
1630 (define_insn "<sse>_comi"
1631 [(set (reg:CCFP FLAGS_REG)
1632 (compare:CCFP
1633 (vec_select:MODEF
1634 (match_operand:<ssevecmode> 0 "register_operand" "x")
1635 (parallel [(const_int 0)]))
1636 (vec_select:MODEF
1637 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1638 (parallel [(const_int 0)]))))]
1639 "SSE_FLOAT_MODE_P (<MODE>mode)"
1640 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1641 [(set_attr "type" "ssecomi")
1642 (set_attr "prefix" "maybe_vex")
1643 (set_attr "prefix_rep" "0")
1644 (set (attr "prefix_data16")
1645 (if_then_else (eq_attr "mode" "DF")
1646 (const_string "1")
1647 (const_string "0")))
1648 (set_attr "mode" "<MODE>")])
1649
1650 (define_insn "<sse>_ucomi"
1651 [(set (reg:CCFPU FLAGS_REG)
1652 (compare:CCFPU
1653 (vec_select:MODEF
1654 (match_operand:<ssevecmode> 0 "register_operand" "x")
1655 (parallel [(const_int 0)]))
1656 (vec_select:MODEF
1657 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1658 (parallel [(const_int 0)]))))]
1659 "SSE_FLOAT_MODE_P (<MODE>mode)"
1660 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1661 [(set_attr "type" "ssecomi")
1662 (set_attr "prefix" "maybe_vex")
1663 (set_attr "prefix_rep" "0")
1664 (set (attr "prefix_data16")
1665 (if_then_else (eq_attr "mode" "DF")
1666 (const_string "1")
1667 (const_string "0")))
1668 (set_attr "mode" "<MODE>")])
1669
1670 (define_expand "vcond<mode>"
1671 [(set (match_operand:AVXMODEF2P 0 "register_operand" "")
1672 (if_then_else:AVXMODEF2P
1673 (match_operator 3 ""
1674 [(match_operand:AVXMODEF2P 4 "nonimmediate_operand" "")
1675 (match_operand:AVXMODEF2P 5 "nonimmediate_operand" "")])
1676 (match_operand:AVXMODEF2P 1 "general_operand" "")
1677 (match_operand:AVXMODEF2P 2 "general_operand" "")))]
1678 "(SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1679 || AVX_VEC_FLOAT_MODE_P (<MODE>mode))"
1680 {
1681 bool ok = ix86_expand_fp_vcond (operands);
1682 gcc_assert (ok);
1683 DONE;
1684 })
1685
1686 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1687 ;;
1688 ;; Parallel floating point logical operations
1689 ;;
1690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1691
1692 (define_insn "avx_andnot<mode>3"
1693 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1694 (and:AVXMODEF2P
1695 (not:AVXMODEF2P
1696 (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1697 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1698 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1699 "vandn<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1700 [(set_attr "type" "sselog")
1701 (set_attr "prefix" "vex")
1702 (set_attr "mode" "<avxvecmode>")])
1703
1704 (define_insn "<sse>_andnot<mode>3"
1705 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1706 (and:SSEMODEF2P
1707 (not:SSEMODEF2P
1708 (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1709 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1710 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1711 "andn<ssemodesuffix>\t{%2, %0|%0, %2}"
1712 [(set_attr "type" "sselog")
1713 (set_attr "mode" "<MODE>")])
1714
1715 (define_expand "<code><mode>3"
1716 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1717 (any_logic:AVX256MODEF2P
1718 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1719 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1720 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1721 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1722
1723 (define_insn "*avx_<code><mode>3"
1724 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1725 (any_logic:AVXMODEF2P
1726 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1727 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1728 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1729 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1730 {
1731 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1732 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1733 else
1734 return "v<logic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1735 }
1736 [(set_attr "type" "sselog")
1737 (set_attr "prefix" "vex")
1738 (set_attr "mode" "<avxvecmode>")])
1739
1740 (define_expand "<code><mode>3"
1741 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1742 (any_logic:SSEMODEF2P
1743 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1744 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1745 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1746 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1747
1748 (define_insn "*<code><mode>3"
1749 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1750 (any_logic:SSEMODEF2P
1751 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1752 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1753 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1754 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1755 {
1756 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1757 return "<logic>ps\t{%2, %0|%0, %2}";
1758 else
1759 return "<logic><ssemodesuffix>\t{%2, %0|%0, %2}";
1760 }
1761 [(set_attr "type" "sselog")
1762 (set_attr "mode" "<MODE>")])
1763
1764 (define_expand "copysign<mode>3"
1765 [(set (match_dup 4)
1766 (and:VEC_FLOAT_MODE
1767 (not:VEC_FLOAT_MODE (match_dup 3))
1768 (match_operand:VEC_FLOAT_MODE 1 "nonimmediate_operand" "")))
1769 (set (match_dup 5)
1770 (and:VEC_FLOAT_MODE (match_dup 3)
1771 (match_operand:VEC_FLOAT_MODE 2 "nonimmediate_operand" "")))
1772 (set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
1773 (ior:VEC_FLOAT_MODE (match_dup 4) (match_dup 5)))]
1774 ""
1775 {
1776 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1777
1778 operands[4] = gen_reg_rtx (<MODE>mode);
1779 operands[5] = gen_reg_rtx (<MODE>mode);
1780 })
1781
1782 ;; Also define scalar versions. These are used for abs, neg, and
1783 ;; conditional move. Using subregs into vector modes causes register
1784 ;; allocation lossage. These patterns do not allow memory operands
1785 ;; because the native instructions read the full 128-bits.
1786
1787 (define_insn "*avx_andnot<mode>3"
1788 [(set (match_operand:MODEF 0 "register_operand" "=x")
1789 (and:MODEF
1790 (not:MODEF
1791 (match_operand:MODEF 1 "register_operand" "x"))
1792 (match_operand:MODEF 2 "register_operand" "x")))]
1793 "AVX_FLOAT_MODE_P (<MODE>mode)"
1794 "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1795 [(set_attr "type" "sselog")
1796 (set_attr "prefix" "vex")
1797 (set_attr "mode" "<ssevecmode>")])
1798
1799 (define_insn "*andnot<mode>3"
1800 [(set (match_operand:MODEF 0 "register_operand" "=x")
1801 (and:MODEF
1802 (not:MODEF
1803 (match_operand:MODEF 1 "register_operand" "0"))
1804 (match_operand:MODEF 2 "register_operand" "x")))]
1805 "SSE_FLOAT_MODE_P (<MODE>mode)"
1806 "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
1807 [(set_attr "type" "sselog")
1808 (set_attr "mode" "<ssevecmode>")])
1809
1810 (define_insn "*avx_<code><mode>3"
1811 [(set (match_operand:MODEF 0 "register_operand" "=x")
1812 (any_logic:MODEF
1813 (match_operand:MODEF 1 "register_operand" "x")
1814 (match_operand:MODEF 2 "register_operand" "x")))]
1815 "AVX_FLOAT_MODE_P (<MODE>mode)"
1816 {
1817 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1818 return "v<logic>ps\t{%2, %1, %0|%0, %1, %2}";
1819 else
1820 return "v<logic>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}";
1821 }
1822 [(set_attr "type" "sselog")
1823 (set_attr "prefix" "vex")
1824 (set_attr "mode" "<ssevecmode>")])
1825
1826 (define_insn "*<code><mode>3"
1827 [(set (match_operand:MODEF 0 "register_operand" "=x")
1828 (any_logic:MODEF
1829 (match_operand:MODEF 1 "register_operand" "0")
1830 (match_operand:MODEF 2 "register_operand" "x")))]
1831 "SSE_FLOAT_MODE_P (<MODE>mode)"
1832 {
1833 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
1834 return "<logic>ps\t{%2, %0|%0, %2}";
1835 else
1836 return "<logic>p<ssemodefsuffix>\t{%2, %0|%0, %2}";
1837 }
1838 [(set_attr "type" "sselog")
1839 (set_attr "mode" "<ssevecmode>")])
1840
1841 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1842 ;;
1843 ;; FMA4 floating point multiply/accumulate instructions. This
1844 ;; includes the scalar version of the instructions as well as the
1845 ;; vector.
1846 ;;
1847 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1848
1849 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1850 ;; combine to generate a multiply/add with two memory references. We then
1851 ;; split this insn, into loading up the destination register with one of the
1852 ;; memory operations. If we don't manage to split the insn, reload will
1853 ;; generate the appropriate moves. The reason this is needed, is that combine
1854 ;; has already folded one of the memory references into both the multiply and
1855 ;; add insns, and it can't generate a new pseudo. I.e.:
1856 ;; (set (reg1) (mem (addr1)))
1857 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1858 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1859
1860 ;; Intrinsic FMA operations.
1861
1862 ;; The standard name for fma is only available with SSE math enabled.
1863 (define_expand "fma<mode>4"
1864 [(set (match_operand:FMAMODE 0 "register_operand")
1865 (fma:FMAMODE
1866 (match_operand:FMAMODE 1 "nonimmediate_operand")
1867 (match_operand:FMAMODE 2 "nonimmediate_operand")
1868 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1869 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH"
1870 "")
1871
1872 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1873 (define_expand "fma4i_fmadd_<mode>"
1874 [(set (match_operand:FMAMODE 0 "register_operand")
1875 (fma:FMAMODE
1876 (match_operand:FMAMODE 1 "nonimmediate_operand")
1877 (match_operand:FMAMODE 2 "nonimmediate_operand")
1878 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1879 "TARGET_FMA || TARGET_FMA4"
1880 "")
1881
1882 (define_insn "*fma4i_fmadd_<mode>"
1883 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1884 (fma:FMAMODE
1885 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1886 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1887 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1888 "TARGET_FMA4"
1889 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1890 [(set_attr "type" "ssemuladd")
1891 (set_attr "mode" "<MODE>")])
1892
1893 (define_insn "*fma4i_fmsub_<mode>"
1894 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1895 (fma:FMAMODE
1896 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1897 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1898 (neg:FMAMODE
1899 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1900 "TARGET_FMA4"
1901 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1902 [(set_attr "type" "ssemuladd")
1903 (set_attr "mode" "<MODE>")])
1904
1905 (define_insn "*fma4i_fnmadd_<mode>"
1906 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1907 (fma:FMAMODE
1908 (neg:FMAMODE
1909 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1910 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1911 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1912 "TARGET_FMA4"
1913 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1914 [(set_attr "type" "ssemuladd")
1915 (set_attr "mode" "<MODE>")])
1916
1917 (define_insn "*fma4i_fnmsub_<mode>"
1918 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1919 (fma:FMAMODE
1920 (neg:FMAMODE
1921 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1922 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1923 (neg:FMAMODE
1924 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1925 "TARGET_FMA4"
1926 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1927 [(set_attr "type" "ssemuladd")
1928 (set_attr "mode" "<MODE>")])
1929
1930 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1931 ;; entire destination register, with the high-order elements zeroed.
1932
1933 (define_expand "fma4i_vmfmadd_<mode>"
1934 [(set (match_operand:SSEMODEF2P 0 "register_operand")
1935 (vec_merge:SSEMODEF2P
1936 (fma:SSEMODEF2P
1937 (match_operand:SSEMODEF2P 1 "nonimmediate_operand")
1938 (match_operand:SSEMODEF2P 2 "nonimmediate_operand")
1939 (match_operand:SSEMODEF2P 3 "nonimmediate_operand"))
1940 (match_dup 4)
1941 (const_int 1)))]
1942 "TARGET_FMA4"
1943 {
1944 operands[4] = CONST0_RTX (<MODE>mode);
1945 })
1946
1947 (define_insn "*fma4i_vmfmadd_<mode>"
1948 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1949 (vec_merge:SSEMODEF2P
1950 (fma:SSEMODEF2P
1951 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1952 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1953 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1954 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1955 (const_int 1)))]
1956 "TARGET_FMA4"
1957 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1958 [(set_attr "type" "ssemuladd")
1959 (set_attr "mode" "<MODE>")])
1960
1961 (define_insn "*fma4i_vmfmsub_<mode>"
1962 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1963 (vec_merge:SSEMODEF2P
1964 (fma:SSEMODEF2P
1965 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1966 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1967 (neg:SSEMODEF2P
1968 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
1969 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1970 (const_int 1)))]
1971 "TARGET_FMA4"
1972 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1973 [(set_attr "type" "ssemuladd")
1974 (set_attr "mode" "<MODE>")])
1975
1976 (define_insn "*fma4i_vmfnmadd_<mode>"
1977 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1978 (vec_merge:SSEMODEF2P
1979 (fma:SSEMODEF2P
1980 (neg:SSEMODEF2P
1981 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1982 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1983 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1984 (match_operand:SSEMODEF2P 4 "const0_operand" "")
1985 (const_int 1)))]
1986 "TARGET_FMA4"
1987 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1988 [(set_attr "type" "ssemuladd")
1989 (set_attr "mode" "<MODE>")])
1990
1991 (define_insn "*fma4i_vmfnmsub_<mode>"
1992 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1993 (vec_merge:SSEMODEF2P
1994 (fma:SSEMODEF2P
1995 (neg:SSEMODEF2P
1996 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1997 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
1998 (neg:SSEMODEF2P
1999 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
2000 (match_operand:SSEMODEF2P 4 "const0_operand" "")
2001 (const_int 1)))]
2002 "TARGET_FMA4"
2003 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2004 [(set_attr "type" "ssemuladd")
2005 (set_attr "mode" "<MODE>")])
2006
2007 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2008 ;;
2009 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2010 ;;
2011 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2012
2013 ;; It would be possible to represent these without the UNSPEC as
2014 ;;
2015 ;; (vec_merge
2016 ;; (fma op1 op2 op3)
2017 ;; (fma op1 op2 (neg op3))
2018 ;; (merge-const))
2019 ;;
2020 ;; But this doesn't seem useful in practice.
2021
2022 (define_expand "fmaddsub_<mode>"
2023 [(set (match_operand:AVXMODEF2P 0 "register_operand")
2024 (unspec:AVXMODEF2P
2025 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand")
2026 (match_operand:AVXMODEF2P 2 "nonimmediate_operand")
2027 (match_operand:AVXMODEF2P 3 "nonimmediate_operand")]
2028 UNSPEC_FMADDSUB))]
2029 "TARGET_FMA || TARGET_FMA4"
2030 "")
2031
2032 (define_insn "*fma4_fmaddsub_<mode>"
2033 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
2034 (unspec:AVXMODEF2P
2035 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
2036 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
2037 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x")]
2038 UNSPEC_FMADDSUB))]
2039 "TARGET_FMA4"
2040 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2041 [(set_attr "type" "ssemuladd")
2042 (set_attr "mode" "<MODE>")])
2043
2044 (define_insn "*fma4_fmsubadd_<mode>"
2045 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
2046 (unspec:AVXMODEF2P
2047 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
2048 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
2049 (neg:AVXMODEF2P
2050 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2051 UNSPEC_FMADDSUB))]
2052 "TARGET_FMA4"
2053 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2054 [(set_attr "type" "ssemuladd")
2055 (set_attr "mode" "<MODE>")])
2056
2057 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2058 ;;
2059 ;; FMA3 floating point multiply/accumulate instructions.
2060 ;;
2061 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2062
2063 (define_insn "*fma_fmadd_<mode>"
2064 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2065 (fma:FMAMODE
2066 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2067 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2068 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2069 "TARGET_FMA"
2070 "@
2071 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2072 vfmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2073 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2074 [(set_attr "type" "ssemuladd")
2075 (set_attr "mode" "<MODE>")])
2076
2077 (define_insn "*fma_fmsub_<mode>"
2078 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2079 (fma:FMAMODE
2080 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2081 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2082 (neg:FMAMODE
2083 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2084 "TARGET_FMA"
2085 "@
2086 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2087 vfmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2088 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2089 [(set_attr "type" "ssemuladd")
2090 (set_attr "mode" "<MODE>")])
2091
2092 (define_insn "*fma_fmadd_<mode>"
2093 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2094 (fma:FMAMODE
2095 (neg:FMAMODE
2096 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2097 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2098 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2099 "TARGET_FMA"
2100 "@
2101 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2102 vfnmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2103 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2104 [(set_attr "type" "ssemuladd")
2105 (set_attr "mode" "<MODE>")])
2106
2107 (define_insn "*fma_fmsub_<mode>"
2108 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2109 (fma:FMAMODE
2110 (neg:FMAMODE
2111 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2112 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2113 (neg:FMAMODE
2114 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2115 "TARGET_FMA"
2116 "@
2117 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2118 vfnmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2119 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2120 [(set_attr "type" "ssemuladd")
2121 (set_attr "mode" "<MODE>")])
2122
2123 (define_insn "*fma_fmaddsub_<mode>"
2124 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
2125 (unspec:AVXMODEF2P
2126 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
2127 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
2128 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0")]
2129 UNSPEC_FMADDSUB))]
2130 "TARGET_FMA"
2131 "@
2132 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2133 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2134 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2135 [(set_attr "type" "ssemuladd")
2136 (set_attr "mode" "<MODE>")])
2137
2138 (define_insn "*fma_fmsubadd_<mode>"
2139 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x,x")
2140 (unspec:AVXMODEF2P
2141 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%0, 0,x")
2142 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm, x,xm")
2143 (neg:AVXMODEF2P
2144 (match_operand:AVXMODEF2P 3 "nonimmediate_operand" " x,xm,0"))]
2145 UNSPEC_FMADDSUB))]
2146 "TARGET_FMA"
2147 "@
2148 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2149 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2150 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2151 [(set_attr "type" "ssemuladd")
2152 (set_attr "mode" "<MODE>")])
2153
2154 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2155 ;;
2156 ;; Non-intrinsic versions, matched when fused-multiply-add is allowed.
2157 ;;
2158 ;; ??? If fused-madd were a generic flag, combine could do this without
2159 ;; needing splitters here in the backend. Irritatingly, combine won't
2160 ;; recognize many of these with mere splits, since only 3 or more insns
2161 ;; are allowed to split during combine. Thankfully, there's always a
2162 ;; split_all_insns pass that runs before reload.
2163 ;;
2164 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2165
2166 (define_insn_and_split "*split_fma"
2167 [(set (match_operand:FMAMODE 0 "register_operand")
2168 (plus:FMAMODE
2169 (mult:FMAMODE
2170 (match_operand:FMAMODE 1 "nonimmediate_operand")
2171 (match_operand:FMAMODE 2 "nonimmediate_operand"))
2172 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2173 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2174 && (TARGET_FMA || TARGET_FMA4)
2175 && !(reload_in_progress || reload_completed)"
2176 { gcc_unreachable (); }
2177 "&& 1"
2178 [(set (match_dup 0)
2179 (fma:FMAMODE
2180 (match_dup 1)
2181 (match_dup 2)
2182 (match_dup 3)))]
2183 "")
2184
2185 ;; Floating multiply and subtract.
2186 (define_insn_and_split "*split_fms"
2187 [(set (match_operand:FMAMODE 0 "register_operand")
2188 (minus:FMAMODE
2189 (mult:FMAMODE
2190 (match_operand:FMAMODE 1 "nonimmediate_operand")
2191 (match_operand:FMAMODE 2 "nonimmediate_operand"))
2192 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2193 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2194 && (TARGET_FMA || TARGET_FMA4)
2195 && !(reload_in_progress || reload_completed)"
2196 { gcc_unreachable (); }
2197 "&& 1"
2198 [(set (match_dup 0)
2199 (fma:FMAMODE
2200 (match_dup 1)
2201 (match_dup 2)
2202 (neg:FMAMODE (match_dup 3))))]
2203 "")
2204
2205 ;; Floating point negative multiply and add.
2206 ;; Recognize (-a * b + c) via the canonical form: c - (a * b).
2207 (define_insn_and_split "*split_fnma"
2208 [(set (match_operand:FMAMODE 0 "register_operand")
2209 (minus:FMAMODE
2210 (match_operand:FMAMODE 3 "nonimmediate_operand")
2211 (mult:FMAMODE
2212 (match_operand:FMAMODE 1 "nonimmediate_operand")
2213 (match_operand:FMAMODE 2 "nonimmediate_operand"))))]
2214 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2215 && (TARGET_FMA || TARGET_FMA4)
2216 && !(reload_in_progress || reload_completed)"
2217 { gcc_unreachable (); }
2218 "&& 1"
2219 [(set (match_dup 0)
2220 (fma:FMAMODE
2221 (neg:FMAMODE (match_dup 1))
2222 (match_dup 2)
2223 (match_dup 3)))]
2224 "")
2225
2226 ;; Floating point negative multiply and subtract.
2227 ;; Recognize (-a * b - c) via the canonical form: c - (-a * b).
2228 (define_insn_and_split "*split_fnms"
2229 [(set (match_operand:FMAMODE 0 "register_operand")
2230 (minus:FMAMODE
2231 (mult:FMAMODE
2232 (neg:FMAMODE
2233 (match_operand:FMAMODE 1 "nonimmediate_operand"))
2234 (match_operand:FMAMODE 2 "nonimmediate_operand"))
2235 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
2236 "TARGET_SSE_MATH && TARGET_FUSED_MADD
2237 && (TARGET_FMA || TARGET_FMA4)
2238 && !(reload_in_progress || reload_completed)"
2239 { gcc_unreachable (); }
2240 "&& 1"
2241 [(set (match_dup 0)
2242 (fma:FMAMODE
2243 (neg:FMAMODE (match_dup 1))
2244 (match_dup 2)
2245 (neg:FMAMODE (match_dup 3))))]
2246 "")
2247
2248 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2249 ;;
2250 ;; Parallel single-precision floating point conversion operations
2251 ;;
2252 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2253
2254 (define_insn "sse_cvtpi2ps"
2255 [(set (match_operand:V4SF 0 "register_operand" "=x")
2256 (vec_merge:V4SF
2257 (vec_duplicate:V4SF
2258 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2259 (match_operand:V4SF 1 "register_operand" "0")
2260 (const_int 3)))]
2261 "TARGET_SSE"
2262 "cvtpi2ps\t{%2, %0|%0, %2}"
2263 [(set_attr "type" "ssecvt")
2264 (set_attr "mode" "V4SF")])
2265
2266 (define_insn "sse_cvtps2pi"
2267 [(set (match_operand:V2SI 0 "register_operand" "=y")
2268 (vec_select:V2SI
2269 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2270 UNSPEC_FIX_NOTRUNC)
2271 (parallel [(const_int 0) (const_int 1)])))]
2272 "TARGET_SSE"
2273 "cvtps2pi\t{%1, %0|%0, %1}"
2274 [(set_attr "type" "ssecvt")
2275 (set_attr "unit" "mmx")
2276 (set_attr "mode" "DI")])
2277
2278 (define_insn "sse_cvttps2pi"
2279 [(set (match_operand:V2SI 0 "register_operand" "=y")
2280 (vec_select:V2SI
2281 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2282 (parallel [(const_int 0) (const_int 1)])))]
2283 "TARGET_SSE"
2284 "cvttps2pi\t{%1, %0|%0, %1}"
2285 [(set_attr "type" "ssecvt")
2286 (set_attr "unit" "mmx")
2287 (set_attr "prefix_rep" "0")
2288 (set_attr "mode" "SF")])
2289
2290 (define_insn "*avx_cvtsi2ss"
2291 [(set (match_operand:V4SF 0 "register_operand" "=x")
2292 (vec_merge:V4SF
2293 (vec_duplicate:V4SF
2294 (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2295 (match_operand:V4SF 1 "register_operand" "x")
2296 (const_int 1)))]
2297 "TARGET_AVX"
2298 "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2299 [(set_attr "type" "sseicvt")
2300 (set_attr "prefix" "vex")
2301 (set_attr "mode" "SF")])
2302
2303 (define_insn "sse_cvtsi2ss"
2304 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2305 (vec_merge:V4SF
2306 (vec_duplicate:V4SF
2307 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2308 (match_operand:V4SF 1 "register_operand" "0,0")
2309 (const_int 1)))]
2310 "TARGET_SSE"
2311 "cvtsi2ss\t{%2, %0|%0, %2}"
2312 [(set_attr "type" "sseicvt")
2313 (set_attr "athlon_decode" "vector,double")
2314 (set_attr "amdfam10_decode" "vector,double")
2315 (set_attr "bdver1_decode" "double,direct")
2316 (set_attr "mode" "SF")])
2317
2318 (define_insn "*avx_cvtsi2ssq"
2319 [(set (match_operand:V4SF 0 "register_operand" "=x")
2320 (vec_merge:V4SF
2321 (vec_duplicate:V4SF
2322 (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2323 (match_operand:V4SF 1 "register_operand" "x")
2324 (const_int 1)))]
2325 "TARGET_AVX && TARGET_64BIT"
2326 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2327 [(set_attr "type" "sseicvt")
2328 (set_attr "length_vex" "4")
2329 (set_attr "prefix" "vex")
2330 (set_attr "mode" "SF")])
2331
2332 (define_insn "sse_cvtsi2ssq"
2333 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2334 (vec_merge:V4SF
2335 (vec_duplicate:V4SF
2336 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2337 (match_operand:V4SF 1 "register_operand" "0,0")
2338 (const_int 1)))]
2339 "TARGET_SSE && TARGET_64BIT"
2340 "cvtsi2ssq\t{%2, %0|%0, %2}"
2341 [(set_attr "type" "sseicvt")
2342 (set_attr "prefix_rex" "1")
2343 (set_attr "athlon_decode" "vector,double")
2344 (set_attr "amdfam10_decode" "vector,double")
2345 (set_attr "bdver1_decode" "double,direct")
2346 (set_attr "mode" "SF")])
2347
2348 (define_insn "sse_cvtss2si"
2349 [(set (match_operand:SI 0 "register_operand" "=r,r")
2350 (unspec:SI
2351 [(vec_select:SF
2352 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2353 (parallel [(const_int 0)]))]
2354 UNSPEC_FIX_NOTRUNC))]
2355 "TARGET_SSE"
2356 "%vcvtss2si\t{%1, %0|%0, %1}"
2357 [(set_attr "type" "sseicvt")
2358 (set_attr "athlon_decode" "double,vector")
2359 (set_attr "bdver1_decode" "double,double")
2360 (set_attr "prefix_rep" "1")
2361 (set_attr "prefix" "maybe_vex")
2362 (set_attr "mode" "SI")])
2363
2364 (define_insn "sse_cvtss2si_2"
2365 [(set (match_operand:SI 0 "register_operand" "=r,r")
2366 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2367 UNSPEC_FIX_NOTRUNC))]
2368 "TARGET_SSE"
2369 "%vcvtss2si\t{%1, %0|%0, %1}"
2370 [(set_attr "type" "sseicvt")
2371 (set_attr "athlon_decode" "double,vector")
2372 (set_attr "amdfam10_decode" "double,double")
2373 (set_attr "bdver1_decode" "double,double")
2374 (set_attr "prefix_rep" "1")
2375 (set_attr "prefix" "maybe_vex")
2376 (set_attr "mode" "SI")])
2377
2378 (define_insn "sse_cvtss2siq"
2379 [(set (match_operand:DI 0 "register_operand" "=r,r")
2380 (unspec:DI
2381 [(vec_select:SF
2382 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2383 (parallel [(const_int 0)]))]
2384 UNSPEC_FIX_NOTRUNC))]
2385 "TARGET_SSE && TARGET_64BIT"
2386 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2387 [(set_attr "type" "sseicvt")
2388 (set_attr "athlon_decode" "double,vector")
2389 (set_attr "bdver1_decode" "double,double")
2390 (set_attr "prefix_rep" "1")
2391 (set_attr "prefix" "maybe_vex")
2392 (set_attr "mode" "DI")])
2393
2394 (define_insn "sse_cvtss2siq_2"
2395 [(set (match_operand:DI 0 "register_operand" "=r,r")
2396 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2397 UNSPEC_FIX_NOTRUNC))]
2398 "TARGET_SSE && TARGET_64BIT"
2399 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2400 [(set_attr "type" "sseicvt")
2401 (set_attr "athlon_decode" "double,vector")
2402 (set_attr "amdfam10_decode" "double,double")
2403 (set_attr "bdver1_decode" "double,double")
2404 (set_attr "prefix_rep" "1")
2405 (set_attr "prefix" "maybe_vex")
2406 (set_attr "mode" "DI")])
2407
2408 (define_insn "sse_cvttss2si"
2409 [(set (match_operand:SI 0 "register_operand" "=r,r")
2410 (fix:SI
2411 (vec_select:SF
2412 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2413 (parallel [(const_int 0)]))))]
2414 "TARGET_SSE"
2415 "%vcvttss2si\t{%1, %0|%0, %1}"
2416 [(set_attr "type" "sseicvt")
2417 (set_attr "athlon_decode" "double,vector")
2418 (set_attr "amdfam10_decode" "double,double")
2419 (set_attr "bdver1_decode" "double,double")
2420 (set_attr "prefix_rep" "1")
2421 (set_attr "prefix" "maybe_vex")
2422 (set_attr "mode" "SI")])
2423
2424 (define_insn "sse_cvttss2siq"
2425 [(set (match_operand:DI 0 "register_operand" "=r,r")
2426 (fix:DI
2427 (vec_select:SF
2428 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2429 (parallel [(const_int 0)]))))]
2430 "TARGET_SSE && TARGET_64BIT"
2431 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2432 [(set_attr "type" "sseicvt")
2433 (set_attr "athlon_decode" "double,vector")
2434 (set_attr "amdfam10_decode" "double,double")
2435 (set_attr "bdver1_decode" "double,double")
2436 (set_attr "prefix_rep" "1")
2437 (set_attr "prefix" "maybe_vex")
2438 (set_attr "mode" "DI")])
2439
2440 (define_insn "avx_cvtdq2ps<avxmodesuffix>"
2441 [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2442 (float:AVXMODEDCVTDQ2PS
2443 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2444 "TARGET_AVX"
2445 "vcvtdq2ps\t{%1, %0|%0, %1}"
2446 [(set_attr "type" "ssecvt")
2447 (set_attr "prefix" "vex")
2448 (set_attr "mode" "<avxvecmode>")])
2449
2450 (define_insn "sse2_cvtdq2ps"
2451 [(set (match_operand:V4SF 0 "register_operand" "=x")
2452 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2453 "TARGET_SSE2"
2454 "cvtdq2ps\t{%1, %0|%0, %1}"
2455 [(set_attr "type" "ssecvt")
2456 (set_attr "mode" "V4SF")])
2457
2458 (define_expand "sse2_cvtudq2ps"
2459 [(set (match_dup 5)
2460 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2461 (set (match_dup 6)
2462 (lt:V4SF (match_dup 5) (match_dup 3)))
2463 (set (match_dup 7)
2464 (and:V4SF (match_dup 6) (match_dup 4)))
2465 (set (match_operand:V4SF 0 "register_operand" "")
2466 (plus:V4SF (match_dup 5) (match_dup 7)))]
2467 "TARGET_SSE2"
2468 {
2469 REAL_VALUE_TYPE TWO32r;
2470 rtx x;
2471 int i;
2472
2473 real_ldexp (&TWO32r, &dconst1, 32);
2474 x = const_double_from_real_value (TWO32r, SFmode);
2475
2476 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2477 operands[4] = force_reg (V4SFmode,
2478 ix86_build_const_vector (V4SFmode, 1, x));
2479
2480 for (i = 5; i < 8; i++)
2481 operands[i] = gen_reg_rtx (V4SFmode);
2482 })
2483
2484 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2485 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2486 (unspec:AVXMODEDCVTPS2DQ
2487 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2488 UNSPEC_FIX_NOTRUNC))]
2489 "TARGET_AVX"
2490 "vcvtps2dq\t{%1, %0|%0, %1}"
2491 [(set_attr "type" "ssecvt")
2492 (set_attr "prefix" "vex")
2493 (set_attr "mode" "<avxvecmode>")])
2494
2495 (define_insn "sse2_cvtps2dq"
2496 [(set (match_operand:V4SI 0 "register_operand" "=x")
2497 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2498 UNSPEC_FIX_NOTRUNC))]
2499 "TARGET_SSE2"
2500 "cvtps2dq\t{%1, %0|%0, %1}"
2501 [(set_attr "type" "ssecvt")
2502 (set_attr "prefix_data16" "1")
2503 (set_attr "mode" "TI")])
2504
2505 (define_insn "avx_cvttps2dq<avxmodesuffix>"
2506 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2507 (fix:AVXMODEDCVTPS2DQ
2508 (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
2509 "TARGET_AVX"
2510 "vcvttps2dq\t{%1, %0|%0, %1}"
2511 [(set_attr "type" "ssecvt")
2512 (set_attr "prefix" "vex")
2513 (set_attr "mode" "<avxvecmode>")])
2514
2515 (define_insn "sse2_cvttps2dq"
2516 [(set (match_operand:V4SI 0 "register_operand" "=x")
2517 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2518 "TARGET_SSE2"
2519 "cvttps2dq\t{%1, %0|%0, %1}"
2520 [(set_attr "type" "ssecvt")
2521 (set_attr "prefix_rep" "1")
2522 (set_attr "prefix_data16" "0")
2523 (set_attr "mode" "TI")])
2524
2525 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2526 ;;
2527 ;; Parallel double-precision floating point conversion operations
2528 ;;
2529 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2530
2531 (define_insn "sse2_cvtpi2pd"
2532 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2533 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2534 "TARGET_SSE2"
2535 "cvtpi2pd\t{%1, %0|%0, %1}"
2536 [(set_attr "type" "ssecvt")
2537 (set_attr "unit" "mmx,*")
2538 (set_attr "prefix_data16" "1,*")
2539 (set_attr "mode" "V2DF")])
2540
2541 (define_insn "sse2_cvtpd2pi"
2542 [(set (match_operand:V2SI 0 "register_operand" "=y")
2543 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2544 UNSPEC_FIX_NOTRUNC))]
2545 "TARGET_SSE2"
2546 "cvtpd2pi\t{%1, %0|%0, %1}"
2547 [(set_attr "type" "ssecvt")
2548 (set_attr "unit" "mmx")
2549 (set_attr "prefix_data16" "1")
2550 (set_attr "mode" "DI")
2551 (set_attr "bdver1_decode" "double")])
2552
2553 (define_insn "sse2_cvttpd2pi"
2554 [(set (match_operand:V2SI 0 "register_operand" "=y")
2555 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2556 "TARGET_SSE2"
2557 "cvttpd2pi\t{%1, %0|%0, %1}"
2558 [(set_attr "type" "ssecvt")
2559 (set_attr "unit" "mmx")
2560 (set_attr "prefix_data16" "1")
2561 (set_attr "mode" "TI")
2562 (set_attr "bdver1_decode" "double")])
2563
2564 (define_insn "*avx_cvtsi2sd"
2565 [(set (match_operand:V2DF 0 "register_operand" "=x")
2566 (vec_merge:V2DF
2567 (vec_duplicate:V2DF
2568 (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2569 (match_operand:V2DF 1 "register_operand" "x")
2570 (const_int 1)))]
2571 "TARGET_AVX"
2572 "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2573 [(set_attr "type" "sseicvt")
2574 (set_attr "prefix" "vex")
2575 (set_attr "mode" "DF")])
2576
2577 (define_insn "sse2_cvtsi2sd"
2578 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2579 (vec_merge:V2DF
2580 (vec_duplicate:V2DF
2581 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2582 (match_operand:V2DF 1 "register_operand" "0,0")
2583 (const_int 1)))]
2584 "TARGET_SSE2"
2585 "cvtsi2sd\t{%2, %0|%0, %2}"
2586 [(set_attr "type" "sseicvt")
2587 (set_attr "mode" "DF")
2588 (set_attr "athlon_decode" "double,direct")
2589 (set_attr "amdfam10_decode" "vector,double")
2590 (set_attr "bdver1_decode" "double,direct")])
2591
2592 (define_insn "*avx_cvtsi2sdq"
2593 [(set (match_operand:V2DF 0 "register_operand" "=x")
2594 (vec_merge:V2DF
2595 (vec_duplicate:V2DF
2596 (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2597 (match_operand:V2DF 1 "register_operand" "x")
2598 (const_int 1)))]
2599 "TARGET_AVX && TARGET_64BIT"
2600 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2601 [(set_attr "type" "sseicvt")
2602 (set_attr "length_vex" "4")
2603 (set_attr "prefix" "vex")
2604 (set_attr "mode" "DF")])
2605
2606 (define_insn "sse2_cvtsi2sdq"
2607 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2608 (vec_merge:V2DF
2609 (vec_duplicate:V2DF
2610 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2611 (match_operand:V2DF 1 "register_operand" "0,0")
2612 (const_int 1)))]
2613 "TARGET_SSE2 && TARGET_64BIT"
2614 "cvtsi2sdq\t{%2, %0|%0, %2}"
2615 [(set_attr "type" "sseicvt")
2616 (set_attr "prefix_rex" "1")
2617 (set_attr "mode" "DF")
2618 (set_attr "athlon_decode" "double,direct")
2619 (set_attr "amdfam10_decode" "vector,double")
2620 (set_attr "bdver1_decode" "double,direct")])
2621
2622 (define_insn "sse2_cvtsd2si"
2623 [(set (match_operand:SI 0 "register_operand" "=r,r")
2624 (unspec:SI
2625 [(vec_select:DF
2626 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2627 (parallel [(const_int 0)]))]
2628 UNSPEC_FIX_NOTRUNC))]
2629 "TARGET_SSE2"
2630 "%vcvtsd2si\t{%1, %0|%0, %1}"
2631 [(set_attr "type" "sseicvt")
2632 (set_attr "athlon_decode" "double,vector")
2633 (set_attr "bdver1_decode" "double,double")
2634 (set_attr "prefix_rep" "1")
2635 (set_attr "prefix" "maybe_vex")
2636 (set_attr "mode" "SI")])
2637
2638 (define_insn "sse2_cvtsd2si_2"
2639 [(set (match_operand:SI 0 "register_operand" "=r,r")
2640 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2641 UNSPEC_FIX_NOTRUNC))]
2642 "TARGET_SSE2"
2643 "%vcvtsd2si\t{%1, %0|%0, %1}"
2644 [(set_attr "type" "sseicvt")
2645 (set_attr "athlon_decode" "double,vector")
2646 (set_attr "amdfam10_decode" "double,double")
2647 (set_attr "bdver1_decode" "double,double")
2648 (set_attr "prefix_rep" "1")
2649 (set_attr "prefix" "maybe_vex")
2650 (set_attr "mode" "SI")])
2651
2652 (define_insn "sse2_cvtsd2siq"
2653 [(set (match_operand:DI 0 "register_operand" "=r,r")
2654 (unspec:DI
2655 [(vec_select:DF
2656 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2657 (parallel [(const_int 0)]))]
2658 UNSPEC_FIX_NOTRUNC))]
2659 "TARGET_SSE2 && TARGET_64BIT"
2660 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2661 [(set_attr "type" "sseicvt")
2662 (set_attr "athlon_decode" "double,vector")
2663 (set_attr "bdver1_decode" "double,double")
2664 (set_attr "prefix_rep" "1")
2665 (set_attr "prefix" "maybe_vex")
2666 (set_attr "mode" "DI")])
2667
2668 (define_insn "sse2_cvtsd2siq_2"
2669 [(set (match_operand:DI 0 "register_operand" "=r,r")
2670 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2671 UNSPEC_FIX_NOTRUNC))]
2672 "TARGET_SSE2 && TARGET_64BIT"
2673 "%vcvtsd2siq\t{%1, %0|%0, %1}"
2674 [(set_attr "type" "sseicvt")
2675 (set_attr "athlon_decode" "double,vector")
2676 (set_attr "amdfam10_decode" "double,double")
2677 (set_attr "bdver1_decode" "double,double")
2678 (set_attr "prefix_rep" "1")
2679 (set_attr "prefix" "maybe_vex")
2680 (set_attr "mode" "DI")])
2681
2682 (define_insn "sse2_cvttsd2si"
2683 [(set (match_operand:SI 0 "register_operand" "=r,r")
2684 (fix:SI
2685 (vec_select:DF
2686 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2687 (parallel [(const_int 0)]))))]
2688 "TARGET_SSE2"
2689 "%vcvttsd2si\t{%1, %0|%0, %1}"
2690 [(set_attr "type" "sseicvt")
2691 (set_attr "prefix_rep" "1")
2692 (set_attr "prefix" "maybe_vex")
2693 (set_attr "mode" "SI")
2694 (set_attr "athlon_decode" "double,vector")
2695 (set_attr "amdfam10_decode" "double,double")
2696 (set_attr "bdver1_decode" "double,double")])
2697
2698 (define_insn "sse2_cvttsd2siq"
2699 [(set (match_operand:DI 0 "register_operand" "=r,r")
2700 (fix:DI
2701 (vec_select:DF
2702 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2703 (parallel [(const_int 0)]))))]
2704 "TARGET_SSE2 && TARGET_64BIT"
2705 "%vcvttsd2siq\t{%1, %0|%0, %1}"
2706 [(set_attr "type" "sseicvt")
2707 (set_attr "prefix_rep" "1")
2708 (set_attr "prefix" "maybe_vex")
2709 (set_attr "mode" "DI")
2710 (set_attr "athlon_decode" "double,vector")
2711 (set_attr "amdfam10_decode" "double,double")
2712 (set_attr "bdver1_decode" "double,double")])
2713
2714 (define_insn "avx_cvtdq2pd256"
2715 [(set (match_operand:V4DF 0 "register_operand" "=x")
2716 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2717 "TARGET_AVX"
2718 "vcvtdq2pd\t{%1, %0|%0, %1}"
2719 [(set_attr "type" "ssecvt")
2720 (set_attr "prefix" "vex")
2721 (set_attr "mode" "V4DF")])
2722
2723 (define_insn "*avx_cvtdq2pd256_2"
2724 [(set (match_operand:V4DF 0 "register_operand" "=x")
2725 (float:V4DF
2726 (vec_select:V4SI
2727 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2728 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2729 "TARGET_AVX"
2730 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2731 [(set_attr "type" "ssecvt")
2732 (set_attr "prefix" "vex")
2733 (set_attr "mode" "V4DF")])
2734
2735 (define_insn "sse2_cvtdq2pd"
2736 [(set (match_operand:V2DF 0 "register_operand" "=x")
2737 (float:V2DF
2738 (vec_select:V2SI
2739 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2740 (parallel [(const_int 0) (const_int 1)]))))]
2741 "TARGET_SSE2"
2742 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2743 [(set_attr "type" "ssecvt")
2744 (set_attr "prefix" "maybe_vex")
2745 (set_attr "mode" "V2DF")])
2746
2747 (define_insn "avx_cvtpd2dq256"
2748 [(set (match_operand:V4SI 0 "register_operand" "=x")
2749 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2750 UNSPEC_FIX_NOTRUNC))]
2751 "TARGET_AVX"
2752 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2753 [(set_attr "type" "ssecvt")
2754 (set_attr "prefix" "vex")
2755 (set_attr "mode" "OI")])
2756
2757 (define_expand "sse2_cvtpd2dq"
2758 [(set (match_operand:V4SI 0 "register_operand" "")
2759 (vec_concat:V4SI
2760 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2761 UNSPEC_FIX_NOTRUNC)
2762 (match_dup 2)))]
2763 "TARGET_SSE2"
2764 "operands[2] = CONST0_RTX (V2SImode);")
2765
2766 (define_insn "*sse2_cvtpd2dq"
2767 [(set (match_operand:V4SI 0 "register_operand" "=x")
2768 (vec_concat:V4SI
2769 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2770 UNSPEC_FIX_NOTRUNC)
2771 (match_operand:V2SI 2 "const0_operand" "")))]
2772 "TARGET_SSE2"
2773 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2774 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2775 [(set_attr "type" "ssecvt")
2776 (set_attr "prefix_rep" "1")
2777 (set_attr "prefix_data16" "0")
2778 (set_attr "prefix" "maybe_vex")
2779 (set_attr "mode" "TI")
2780 (set_attr "amdfam10_decode" "double")
2781 (set_attr "bdver1_decode" "double")])
2782
2783 (define_insn "avx_cvttpd2dq256"
2784 [(set (match_operand:V4SI 0 "register_operand" "=x")
2785 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2786 "TARGET_AVX"
2787 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2788 [(set_attr "type" "ssecvt")
2789 (set_attr "prefix" "vex")
2790 (set_attr "mode" "OI")])
2791
2792 (define_expand "sse2_cvttpd2dq"
2793 [(set (match_operand:V4SI 0 "register_operand" "")
2794 (vec_concat:V4SI
2795 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2796 (match_dup 2)))]
2797 "TARGET_SSE2"
2798 "operands[2] = CONST0_RTX (V2SImode);")
2799
2800 (define_insn "*sse2_cvttpd2dq"
2801 [(set (match_operand:V4SI 0 "register_operand" "=x")
2802 (vec_concat:V4SI
2803 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2804 (match_operand:V2SI 2 "const0_operand" "")))]
2805 "TARGET_SSE2"
2806 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2807 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2808 [(set_attr "type" "ssecvt")
2809 (set_attr "prefix" "maybe_vex")
2810 (set_attr "mode" "TI")
2811 (set_attr "amdfam10_decode" "double")
2812 (set_attr "bdver1_decode" "double")])
2813
2814 (define_insn "*avx_cvtsd2ss"
2815 [(set (match_operand:V4SF 0 "register_operand" "=x")
2816 (vec_merge:V4SF
2817 (vec_duplicate:V4SF
2818 (float_truncate:V2SF
2819 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2820 (match_operand:V4SF 1 "register_operand" "x")
2821 (const_int 1)))]
2822 "TARGET_AVX"
2823 "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2824 [(set_attr "type" "ssecvt")
2825 (set_attr "prefix" "vex")
2826 (set_attr "mode" "SF")])
2827
2828 (define_insn "sse2_cvtsd2ss"
2829 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2830 (vec_merge:V4SF
2831 (vec_duplicate:V4SF
2832 (float_truncate:V2SF
2833 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2834 (match_operand:V4SF 1 "register_operand" "0,0")
2835 (const_int 1)))]
2836 "TARGET_SSE2"
2837 "cvtsd2ss\t{%2, %0|%0, %2}"
2838 [(set_attr "type" "ssecvt")
2839 (set_attr "athlon_decode" "vector,double")
2840 (set_attr "amdfam10_decode" "vector,double")
2841 (set_attr "bdver1_decode" "direct,direct")
2842 (set_attr "mode" "SF")])
2843
2844 (define_insn "*avx_cvtss2sd"
2845 [(set (match_operand:V2DF 0 "register_operand" "=x")
2846 (vec_merge:V2DF
2847 (float_extend:V2DF
2848 (vec_select:V2SF
2849 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2850 (parallel [(const_int 0) (const_int 1)])))
2851 (match_operand:V2DF 1 "register_operand" "x")
2852 (const_int 1)))]
2853 "TARGET_AVX"
2854 "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2855 [(set_attr "type" "ssecvt")
2856 (set_attr "prefix" "vex")
2857 (set_attr "mode" "DF")])
2858
2859 (define_insn "sse2_cvtss2sd"
2860 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2861 (vec_merge:V2DF
2862 (float_extend:V2DF
2863 (vec_select:V2SF
2864 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2865 (parallel [(const_int 0) (const_int 1)])))
2866 (match_operand:V2DF 1 "register_operand" "0,0")
2867 (const_int 1)))]
2868 "TARGET_SSE2"
2869 "cvtss2sd\t{%2, %0|%0, %2}"
2870 [(set_attr "type" "ssecvt")
2871 (set_attr "amdfam10_decode" "vector,double")
2872 (set_attr "bdver1_decode" "direct,direct")
2873 (set_attr "mode" "DF")])
2874
2875 (define_insn "avx_cvtpd2ps256"
2876 [(set (match_operand:V4SF 0 "register_operand" "=x")
2877 (float_truncate:V4SF
2878 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2879 "TARGET_AVX"
2880 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2881 [(set_attr "type" "ssecvt")
2882 (set_attr "prefix" "vex")
2883 (set_attr "mode" "V4SF")])
2884
2885 (define_expand "sse2_cvtpd2ps"
2886 [(set (match_operand:V4SF 0 "register_operand" "")
2887 (vec_concat:V4SF
2888 (float_truncate:V2SF
2889 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2890 (match_dup 2)))]
2891 "TARGET_SSE2"
2892 "operands[2] = CONST0_RTX (V2SFmode);")
2893
2894 (define_insn "*sse2_cvtpd2ps"
2895 [(set (match_operand:V4SF 0 "register_operand" "=x")
2896 (vec_concat:V4SF
2897 (float_truncate:V2SF
2898 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2899 (match_operand:V2SF 2 "const0_operand" "")))]
2900 "TARGET_SSE2"
2901 "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
2902 : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
2903 [(set_attr "type" "ssecvt")
2904 (set_attr "prefix_data16" "1")
2905 (set_attr "prefix" "maybe_vex")
2906 (set_attr "mode" "V4SF")
2907 (set_attr "amdfam10_decode" "double")
2908 (set_attr "bdver1_decode" "double")])
2909
2910 (define_insn "avx_cvtps2pd256"
2911 [(set (match_operand:V4DF 0 "register_operand" "=x")
2912 (float_extend:V4DF
2913 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2914 "TARGET_AVX"
2915 "vcvtps2pd\t{%1, %0|%0, %1}"
2916 [(set_attr "type" "ssecvt")
2917 (set_attr "prefix" "vex")
2918 (set_attr "mode" "V4DF")])
2919
2920 (define_insn "*avx_cvtps2pd256_2"
2921 [(set (match_operand:V4DF 0 "register_operand" "=x")
2922 (float_extend:V4DF
2923 (vec_select:V4SF
2924 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2925 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2926 "TARGET_AVX"
2927 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2928 [(set_attr "type" "ssecvt")
2929 (set_attr "prefix" "vex")
2930 (set_attr "mode" "V4DF")])
2931
2932 (define_insn "sse2_cvtps2pd"
2933 [(set (match_operand:V2DF 0 "register_operand" "=x")
2934 (float_extend:V2DF
2935 (vec_select:V2SF
2936 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2937 (parallel [(const_int 0) (const_int 1)]))))]
2938 "TARGET_SSE2"
2939 "%vcvtps2pd\t{%1, %0|%0, %1}"
2940 [(set_attr "type" "ssecvt")
2941 (set_attr "prefix" "maybe_vex")
2942 (set_attr "mode" "V2DF")
2943 (set_attr "prefix_data16" "0")
2944 (set_attr "amdfam10_decode" "direct")
2945 (set_attr "bdver1_decode" "double")])
2946
2947 (define_expand "vec_unpacks_hi_v4sf"
2948 [(set (match_dup 2)
2949 (vec_select:V4SF
2950 (vec_concat:V8SF
2951 (match_dup 2)
2952 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2953 (parallel [(const_int 6)
2954 (const_int 7)
2955 (const_int 2)
2956 (const_int 3)])))
2957 (set (match_operand:V2DF 0 "register_operand" "")
2958 (float_extend:V2DF
2959 (vec_select:V2SF
2960 (match_dup 2)
2961 (parallel [(const_int 0) (const_int 1)]))))]
2962 "TARGET_SSE2"
2963 "operands[2] = gen_reg_rtx (V4SFmode);")
2964
2965 (define_expand "vec_unpacks_hi_v8sf"
2966 [(set (match_dup 2)
2967 (vec_select:V4SF
2968 (match_operand:V8SF 1 "nonimmediate_operand" "")
2969 (parallel [(const_int 4)
2970 (const_int 5)
2971 (const_int 6)
2972 (const_int 7)])))
2973 (set (match_operand:V4DF 0 "register_operand" "")
2974 (float_extend:V4DF
2975 (match_dup 2)))]
2976 "TARGET_AVX"
2977 {
2978 operands[2] = gen_reg_rtx (V4SFmode);
2979 })
2980
2981 (define_expand "vec_unpacks_lo_v4sf"
2982 [(set (match_operand:V2DF 0 "register_operand" "")
2983 (float_extend:V2DF
2984 (vec_select:V2SF
2985 (match_operand:V4SF 1 "nonimmediate_operand" "")
2986 (parallel [(const_int 0) (const_int 1)]))))]
2987 "TARGET_SSE2")
2988
2989 (define_expand "vec_unpacks_lo_v8sf"
2990 [(set (match_operand:V4DF 0 "register_operand" "")
2991 (float_extend:V4DF
2992 (vec_select:V4SF
2993 (match_operand:V8SF 1 "nonimmediate_operand" "")
2994 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
2995 "TARGET_AVX")
2996
2997 (define_expand "vec_unpacks_float_hi_v8hi"
2998 [(match_operand:V4SF 0 "register_operand" "")
2999 (match_operand:V8HI 1 "register_operand" "")]
3000 "TARGET_SSE2"
3001 {
3002 rtx tmp = gen_reg_rtx (V4SImode);
3003
3004 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3005 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3006 DONE;
3007 })
3008
3009 (define_expand "vec_unpacks_float_lo_v8hi"
3010 [(match_operand:V4SF 0 "register_operand" "")
3011 (match_operand:V8HI 1 "register_operand" "")]
3012 "TARGET_SSE2"
3013 {
3014 rtx tmp = gen_reg_rtx (V4SImode);
3015
3016 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3017 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3018 DONE;
3019 })
3020
3021 (define_expand "vec_unpacku_float_hi_v8hi"
3022 [(match_operand:V4SF 0 "register_operand" "")
3023 (match_operand:V8HI 1 "register_operand" "")]
3024 "TARGET_SSE2"
3025 {
3026 rtx tmp = gen_reg_rtx (V4SImode);
3027
3028 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3029 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3030 DONE;
3031 })
3032
3033 (define_expand "vec_unpacku_float_lo_v8hi"
3034 [(match_operand:V4SF 0 "register_operand" "")
3035 (match_operand:V8HI 1 "register_operand" "")]
3036 "TARGET_SSE2"
3037 {
3038 rtx tmp = gen_reg_rtx (V4SImode);
3039
3040 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3041 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3042 DONE;
3043 })
3044
3045 (define_expand "vec_unpacks_float_hi_v4si"
3046 [(set (match_dup 2)
3047 (vec_select:V4SI
3048 (match_operand:V4SI 1 "nonimmediate_operand" "")
3049 (parallel [(const_int 2)
3050 (const_int 3)
3051 (const_int 2)
3052 (const_int 3)])))
3053 (set (match_operand:V2DF 0 "register_operand" "")
3054 (float:V2DF
3055 (vec_select:V2SI
3056 (match_dup 2)
3057 (parallel [(const_int 0) (const_int 1)]))))]
3058 "TARGET_SSE2"
3059 "operands[2] = gen_reg_rtx (V4SImode);")
3060
3061 (define_expand "vec_unpacks_float_lo_v4si"
3062 [(set (match_operand:V2DF 0 "register_operand" "")
3063 (float:V2DF
3064 (vec_select:V2SI
3065 (match_operand:V4SI 1 "nonimmediate_operand" "")
3066 (parallel [(const_int 0) (const_int 1)]))))]
3067 "TARGET_SSE2")
3068
3069 (define_expand "vec_unpacks_float_hi_v8si"
3070 [(set (match_dup 2)
3071 (vec_select:V4SI
3072 (match_operand:V8SI 1 "nonimmediate_operand" "")
3073 (parallel [(const_int 4)
3074 (const_int 5)
3075 (const_int 6)
3076 (const_int 7)])))
3077 (set (match_operand:V4DF 0 "register_operand" "")
3078 (float:V4DF
3079 (match_dup 2)))]
3080 "TARGET_AVX"
3081 "operands[2] = gen_reg_rtx (V4SImode);")
3082
3083 (define_expand "vec_unpacks_float_lo_v8si"
3084 [(set (match_operand:V4DF 0 "register_operand" "")
3085 (float:V4DF
3086 (vec_select:V4SI
3087 (match_operand:V8SI 1 "nonimmediate_operand" "")
3088 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
3089 "TARGET_AVX")
3090
3091 (define_expand "vec_unpacku_float_hi_v4si"
3092 [(set (match_dup 5)
3093 (vec_select:V4SI
3094 (match_operand:V4SI 1 "nonimmediate_operand" "")
3095 (parallel [(const_int 2)
3096 (const_int 3)
3097 (const_int 2)
3098 (const_int 3)])))
3099 (set (match_dup 6)
3100 (float:V2DF
3101 (vec_select:V2SI
3102 (match_dup 5)
3103 (parallel [(const_int 0) (const_int 1)]))))
3104 (set (match_dup 7)
3105 (lt:V2DF (match_dup 6) (match_dup 3)))
3106 (set (match_dup 8)
3107 (and:V2DF (match_dup 7) (match_dup 4)))
3108 (set (match_operand:V2DF 0 "register_operand" "")
3109 (plus:V2DF (match_dup 6) (match_dup 8)))]
3110 "TARGET_SSE2"
3111 {
3112 REAL_VALUE_TYPE TWO32r;
3113 rtx x;
3114 int i;
3115
3116 real_ldexp (&TWO32r, &dconst1, 32);
3117 x = const_double_from_real_value (TWO32r, DFmode);
3118
3119 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3120 operands[4] = force_reg (V2DFmode,
3121 ix86_build_const_vector (V2DFmode, 1, x));
3122
3123 operands[5] = gen_reg_rtx (V4SImode);
3124
3125 for (i = 6; i < 9; i++)
3126 operands[i] = gen_reg_rtx (V2DFmode);
3127 })
3128
3129 (define_expand "vec_unpacku_float_lo_v4si"
3130 [(set (match_dup 5)
3131 (float:V2DF
3132 (vec_select:V2SI
3133 (match_operand:V4SI 1 "nonimmediate_operand" "")
3134 (parallel [(const_int 0) (const_int 1)]))))
3135 (set (match_dup 6)
3136 (lt:V2DF (match_dup 5) (match_dup 3)))
3137 (set (match_dup 7)
3138 (and:V2DF (match_dup 6) (match_dup 4)))
3139 (set (match_operand:V2DF 0 "register_operand" "")
3140 (plus:V2DF (match_dup 5) (match_dup 7)))]
3141 "TARGET_SSE2"
3142 {
3143 REAL_VALUE_TYPE TWO32r;
3144 rtx x;
3145 int i;
3146
3147 real_ldexp (&TWO32r, &dconst1, 32);
3148 x = const_double_from_real_value (TWO32r, DFmode);
3149
3150 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3151 operands[4] = force_reg (V2DFmode,
3152 ix86_build_const_vector (V2DFmode, 1, x));
3153
3154 for (i = 5; i < 8; i++)
3155 operands[i] = gen_reg_rtx (V2DFmode);
3156 })
3157
3158 (define_expand "vec_pack_trunc_v4df"
3159 [(set (match_dup 3)
3160 (float_truncate:V4SF
3161 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3162 (set (match_dup 4)
3163 (float_truncate:V4SF
3164 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3165 (set (match_operand:V8SF 0 "register_operand" "")
3166 (vec_concat:V8SF
3167 (match_dup 3)
3168 (match_dup 4)))]
3169 "TARGET_AVX"
3170 {
3171 operands[3] = gen_reg_rtx (V4SFmode);
3172 operands[4] = gen_reg_rtx (V4SFmode);
3173 })
3174
3175 (define_expand "vec_pack_trunc_v2df"
3176 [(match_operand:V4SF 0 "register_operand" "")
3177 (match_operand:V2DF 1 "nonimmediate_operand" "")
3178 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3179 "TARGET_SSE2"
3180 {
3181 rtx r1, r2;
3182
3183 r1 = gen_reg_rtx (V4SFmode);
3184 r2 = gen_reg_rtx (V4SFmode);
3185
3186 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3187 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3188 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3189 DONE;
3190 })
3191
3192 (define_expand "vec_pack_sfix_trunc_v2df"
3193 [(match_operand:V4SI 0 "register_operand" "")
3194 (match_operand:V2DF 1 "nonimmediate_operand" "")
3195 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3196 "TARGET_SSE2"
3197 {
3198 rtx r1, r2;
3199
3200 r1 = gen_reg_rtx (V4SImode);
3201 r2 = gen_reg_rtx (V4SImode);
3202
3203 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3204 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3205 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3206 gen_lowpart (V2DImode, r1),
3207 gen_lowpart (V2DImode, r2)));
3208 DONE;
3209 })
3210
3211 (define_expand "vec_pack_sfix_v2df"
3212 [(match_operand:V4SI 0 "register_operand" "")
3213 (match_operand:V2DF 1 "nonimmediate_operand" "")
3214 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3215 "TARGET_SSE2"
3216 {
3217 rtx r1, r2;
3218
3219 r1 = gen_reg_rtx (V4SImode);
3220 r2 = gen_reg_rtx (V4SImode);
3221
3222 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3223 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3224 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3225 gen_lowpart (V2DImode, r1),
3226 gen_lowpart (V2DImode, r2)));
3227 DONE;
3228 })
3229
3230 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3231 ;;
3232 ;; Parallel single-precision floating point element swizzling
3233 ;;
3234 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3235
3236 (define_expand "sse_movhlps_exp"
3237 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3238 (vec_select:V4SF
3239 (vec_concat:V8SF
3240 (match_operand:V4SF 1 "nonimmediate_operand" "")
3241 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3242 (parallel [(const_int 6)
3243 (const_int 7)
3244 (const_int 2)
3245 (const_int 3)])))]
3246 "TARGET_SSE"
3247 {
3248 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3249
3250 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3251
3252 /* Fix up the destination if needed. */
3253 if (dst != operands[0])
3254 emit_move_insn (operands[0], dst);
3255
3256 DONE;
3257 })
3258
3259 (define_insn "*avx_movhlps"
3260 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3261 (vec_select:V4SF
3262 (vec_concat:V8SF
3263 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3264 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3265 (parallel [(const_int 6)
3266 (const_int 7)
3267 (const_int 2)
3268 (const_int 3)])))]
3269 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3270 "@
3271 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3272 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3273 vmovhps\t{%2, %0|%0, %2}"
3274 [(set_attr "type" "ssemov")
3275 (set_attr "prefix" "vex")
3276 (set_attr "mode" "V4SF,V2SF,V2SF")])
3277
3278 (define_insn "sse_movhlps"
3279 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3280 (vec_select:V4SF
3281 (vec_concat:V8SF
3282 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3283 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3284 (parallel [(const_int 6)
3285 (const_int 7)
3286 (const_int 2)
3287 (const_int 3)])))]
3288 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3289 "@
3290 movhlps\t{%2, %0|%0, %2}
3291 movlps\t{%H2, %0|%0, %H2}
3292 movhps\t{%2, %0|%0, %2}"
3293 [(set_attr "type" "ssemov")
3294 (set_attr "mode" "V4SF,V2SF,V2SF")])
3295
3296 (define_expand "sse_movlhps_exp"
3297 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3298 (vec_select:V4SF
3299 (vec_concat:V8SF
3300 (match_operand:V4SF 1 "nonimmediate_operand" "")
3301 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3302 (parallel [(const_int 0)
3303 (const_int 1)
3304 (const_int 4)
3305 (const_int 5)])))]
3306 "TARGET_SSE"
3307 {
3308 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3309
3310 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3311
3312 /* Fix up the destination if needed. */
3313 if (dst != operands[0])
3314 emit_move_insn (operands[0], dst);
3315
3316 DONE;
3317 })
3318
3319 (define_insn "*avx_movlhps"
3320 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3321 (vec_select:V4SF
3322 (vec_concat:V8SF
3323 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3324 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3325 (parallel [(const_int 0)
3326 (const_int 1)
3327 (const_int 4)
3328 (const_int 5)])))]
3329 "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3330 "@
3331 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3332 vmovhps\t{%2, %1, %0|%0, %1, %2}
3333 vmovlps\t{%2, %H0|%H0, %2}"
3334 [(set_attr "type" "ssemov")
3335 (set_attr "prefix" "vex")
3336 (set_attr "mode" "V4SF,V2SF,V2SF")])
3337
3338 (define_insn "sse_movlhps"
3339 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3340 (vec_select:V4SF
3341 (vec_concat:V8SF
3342 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3343 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3344 (parallel [(const_int 0)
3345 (const_int 1)
3346 (const_int 4)
3347 (const_int 5)])))]
3348 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3349 "@
3350 movlhps\t{%2, %0|%0, %2}
3351 movhps\t{%2, %0|%0, %2}
3352 movlps\t{%2, %H0|%H0, %2}"
3353 [(set_attr "type" "ssemov")
3354 (set_attr "mode" "V4SF,V2SF,V2SF")])
3355
3356 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3357 (define_insn "avx_unpckhps256"
3358 [(set (match_operand:V8SF 0 "register_operand" "=x")
3359 (vec_select:V8SF
3360 (vec_concat:V16SF
3361 (match_operand:V8SF 1 "register_operand" "x")
3362 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3363 (parallel [(const_int 2) (const_int 10)
3364 (const_int 3) (const_int 11)
3365 (const_int 6) (const_int 14)
3366 (const_int 7) (const_int 15)])))]
3367 "TARGET_AVX"
3368 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3369 [(set_attr "type" "sselog")
3370 (set_attr "prefix" "vex")
3371 (set_attr "mode" "V8SF")])
3372
3373 (define_insn "*avx_interleave_highv4sf"
3374 [(set (match_operand:V4SF 0 "register_operand" "=x")
3375 (vec_select:V4SF
3376 (vec_concat:V8SF
3377 (match_operand:V4SF 1 "register_operand" "x")
3378 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3379 (parallel [(const_int 2) (const_int 6)
3380 (const_int 3) (const_int 7)])))]
3381 "TARGET_AVX"
3382 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3383 [(set_attr "type" "sselog")
3384 (set_attr "prefix" "vex")
3385 (set_attr "mode" "V4SF")])
3386
3387 (define_expand "vec_interleave_highv8sf"
3388 [(set (match_dup 3)
3389 (vec_select:V8SF
3390 (vec_concat:V16SF
3391 (match_operand:V8SF 1 "register_operand" "x")
3392 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3393 (parallel [(const_int 0) (const_int 8)
3394 (const_int 1) (const_int 9)
3395 (const_int 4) (const_int 12)
3396 (const_int 5) (const_int 13)])))
3397 (set (match_dup 4)
3398 (vec_select:V8SF
3399 (vec_concat:V16SF
3400 (match_dup 1)
3401 (match_dup 2))
3402 (parallel [(const_int 2) (const_int 10)
3403 (const_int 3) (const_int 11)
3404 (const_int 6) (const_int 14)
3405 (const_int 7) (const_int 15)])))
3406 (set (match_operand:V8SF 0 "register_operand" "")
3407 (vec_select:V8SF
3408 (vec_concat:V16SF
3409 (match_dup 3)
3410 (match_dup 4))
3411 (parallel [(const_int 4) (const_int 5)
3412 (const_int 6) (const_int 7)
3413 (const_int 12) (const_int 13)
3414 (const_int 14) (const_int 15)])))]
3415 "TARGET_AVX"
3416 {
3417 operands[3] = gen_reg_rtx (V8SFmode);
3418 operands[4] = gen_reg_rtx (V8SFmode);
3419 })
3420
3421 (define_insn "vec_interleave_highv4sf"
3422 [(set (match_operand:V4SF 0 "register_operand" "=x")
3423 (vec_select:V4SF
3424 (vec_concat:V8SF
3425 (match_operand:V4SF 1 "register_operand" "0")
3426 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3427 (parallel [(const_int 2) (const_int 6)
3428 (const_int 3) (const_int 7)])))]
3429 "TARGET_SSE"
3430 "unpckhps\t{%2, %0|%0, %2}"
3431 [(set_attr "type" "sselog")
3432 (set_attr "mode" "V4SF")])
3433
3434 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3435 (define_insn "avx_unpcklps256"
3436 [(set (match_operand:V8SF 0 "register_operand" "=x")
3437 (vec_select:V8SF
3438 (vec_concat:V16SF
3439 (match_operand:V8SF 1 "register_operand" "x")
3440 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3441 (parallel [(const_int 0) (const_int 8)
3442 (const_int 1) (const_int 9)
3443 (const_int 4) (const_int 12)
3444 (const_int 5) (const_int 13)])))]
3445 "TARGET_AVX"
3446 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3447 [(set_attr "type" "sselog")
3448 (set_attr "prefix" "vex")
3449 (set_attr "mode" "V8SF")])
3450
3451 (define_insn "*avx_interleave_lowv4sf"
3452 [(set (match_operand:V4SF 0 "register_operand" "=x")
3453 (vec_select:V4SF
3454 (vec_concat:V8SF
3455 (match_operand:V4SF 1 "register_operand" "x")
3456 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3457 (parallel [(const_int 0) (const_int 4)
3458 (const_int 1) (const_int 5)])))]
3459 "TARGET_AVX"
3460 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3461 [(set_attr "type" "sselog")
3462 (set_attr "prefix" "vex")
3463 (set_attr "mode" "V4SF")])
3464
3465 (define_expand "vec_interleave_lowv8sf"
3466 [(set (match_dup 3)
3467 (vec_select:V8SF
3468 (vec_concat:V16SF
3469 (match_operand:V8SF 1 "register_operand" "x")
3470 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3471 (parallel [(const_int 0) (const_int 8)
3472 (const_int 1) (const_int 9)
3473 (const_int 4) (const_int 12)
3474 (const_int 5) (const_int 13)])))
3475 (set (match_dup 4)
3476 (vec_select:V8SF
3477 (vec_concat:V16SF
3478 (match_dup 1)
3479 (match_dup 2))
3480 (parallel [(const_int 2) (const_int 10)
3481 (const_int 3) (const_int 11)
3482 (const_int 6) (const_int 14)
3483 (const_int 7) (const_int 15)])))
3484 (set (match_operand:V8SF 0 "register_operand" "")
3485 (vec_select:V8SF
3486 (vec_concat:V16SF
3487 (match_dup 3)
3488 (match_dup 4))
3489 (parallel [(const_int 0) (const_int 1)
3490 (const_int 2) (const_int 3)
3491 (const_int 8) (const_int 9)
3492 (const_int 10) (const_int 11)])))]
3493 "TARGET_AVX"
3494 {
3495 operands[3] = gen_reg_rtx (V8SFmode);
3496 operands[4] = gen_reg_rtx (V8SFmode);
3497 })
3498
3499 (define_insn "vec_interleave_lowv4sf"
3500 [(set (match_operand:V4SF 0 "register_operand" "=x")
3501 (vec_select:V4SF
3502 (vec_concat:V8SF
3503 (match_operand:V4SF 1 "register_operand" "0")
3504 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3505 (parallel [(const_int 0) (const_int 4)
3506 (const_int 1) (const_int 5)])))]
3507 "TARGET_SSE"
3508 "unpcklps\t{%2, %0|%0, %2}"
3509 [(set_attr "type" "sselog")
3510 (set_attr "mode" "V4SF")])
3511
3512 ;; These are modeled with the same vec_concat as the others so that we
3513 ;; capture users of shufps that can use the new instructions
3514 (define_insn "avx_movshdup256"
3515 [(set (match_operand:V8SF 0 "register_operand" "=x")
3516 (vec_select:V8SF
3517 (vec_concat:V16SF
3518 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3519 (match_dup 1))
3520 (parallel [(const_int 1) (const_int 1)
3521 (const_int 3) (const_int 3)
3522 (const_int 5) (const_int 5)
3523 (const_int 7) (const_int 7)])))]
3524 "TARGET_AVX"
3525 "vmovshdup\t{%1, %0|%0, %1}"
3526 [(set_attr "type" "sse")
3527 (set_attr "prefix" "vex")
3528 (set_attr "mode" "V8SF")])
3529
3530 (define_insn "sse3_movshdup"
3531 [(set (match_operand:V4SF 0 "register_operand" "=x")
3532 (vec_select:V4SF
3533 (vec_concat:V8SF
3534 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3535 (match_dup 1))
3536 (parallel [(const_int 1)
3537 (const_int 1)
3538 (const_int 7)
3539 (const_int 7)])))]
3540 "TARGET_SSE3"
3541 "%vmovshdup\t{%1, %0|%0, %1}"
3542 [(set_attr "type" "sse")
3543 (set_attr "prefix_rep" "1")
3544 (set_attr "prefix" "maybe_vex")
3545 (set_attr "mode" "V4SF")])
3546
3547 (define_insn "avx_movsldup256"
3548 [(set (match_operand:V8SF 0 "register_operand" "=x")
3549 (vec_select:V8SF
3550 (vec_concat:V16SF
3551 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3552 (match_dup 1))
3553 (parallel [(const_int 0) (const_int 0)
3554 (const_int 2) (const_int 2)
3555 (const_int 4) (const_int 4)
3556 (const_int 6) (const_int 6)])))]
3557 "TARGET_AVX"
3558 "vmovsldup\t{%1, %0|%0, %1}"
3559 [(set_attr "type" "sse")
3560 (set_attr "prefix" "vex")
3561 (set_attr "mode" "V8SF")])
3562
3563 (define_insn "sse3_movsldup"
3564 [(set (match_operand:V4SF 0 "register_operand" "=x")
3565 (vec_select:V4SF
3566 (vec_concat:V8SF
3567 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3568 (match_dup 1))
3569 (parallel [(const_int 0)
3570 (const_int 0)
3571 (const_int 6)
3572 (const_int 6)])))]
3573 "TARGET_SSE3"
3574 "%vmovsldup\t{%1, %0|%0, %1}"
3575 [(set_attr "type" "sse")
3576 (set_attr "prefix_rep" "1")
3577 (set_attr "prefix" "maybe_vex")
3578 (set_attr "mode" "V4SF")])
3579
3580 (define_expand "avx_shufps256"
3581 [(match_operand:V8SF 0 "register_operand" "")
3582 (match_operand:V8SF 1 "register_operand" "")
3583 (match_operand:V8SF 2 "nonimmediate_operand" "")
3584 (match_operand:SI 3 "const_int_operand" "")]
3585 "TARGET_AVX"
3586 {
3587 int mask = INTVAL (operands[3]);
3588 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3589 GEN_INT ((mask >> 0) & 3),
3590 GEN_INT ((mask >> 2) & 3),
3591 GEN_INT (((mask >> 4) & 3) + 8),
3592 GEN_INT (((mask >> 6) & 3) + 8),
3593 GEN_INT (((mask >> 0) & 3) + 4),
3594 GEN_INT (((mask >> 2) & 3) + 4),
3595 GEN_INT (((mask >> 4) & 3) + 12),
3596 GEN_INT (((mask >> 6) & 3) + 12)));
3597 DONE;
3598 })
3599
3600 ;; One bit in mask selects 2 elements.
3601 (define_insn "avx_shufps256_1"
3602 [(set (match_operand:V8SF 0 "register_operand" "=x")
3603 (vec_select:V8SF
3604 (vec_concat:V16SF
3605 (match_operand:V8SF 1 "register_operand" "x")
3606 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3607 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3608 (match_operand 4 "const_0_to_3_operand" "")
3609 (match_operand 5 "const_8_to_11_operand" "")
3610 (match_operand 6 "const_8_to_11_operand" "")
3611 (match_operand 7 "const_4_to_7_operand" "")
3612 (match_operand 8 "const_4_to_7_operand" "")
3613 (match_operand 9 "const_12_to_15_operand" "")
3614 (match_operand 10 "const_12_to_15_operand" "")])))]
3615 "TARGET_AVX
3616 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3617 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3618 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3619 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3620 {
3621 int mask;
3622 mask = INTVAL (operands[3]);
3623 mask |= INTVAL (operands[4]) << 2;
3624 mask |= (INTVAL (operands[5]) - 8) << 4;
3625 mask |= (INTVAL (operands[6]) - 8) << 6;
3626 operands[3] = GEN_INT (mask);
3627
3628 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3629 }
3630 [(set_attr "type" "sselog")
3631 (set_attr "length_immediate" "1")
3632 (set_attr "prefix" "vex")
3633 (set_attr "mode" "V8SF")])
3634
3635 (define_expand "sse_shufps"
3636 [(match_operand:V4SF 0 "register_operand" "")
3637 (match_operand:V4SF 1 "register_operand" "")
3638 (match_operand:V4SF 2 "nonimmediate_operand" "")
3639 (match_operand:SI 3 "const_int_operand" "")]
3640 "TARGET_SSE"
3641 {
3642 int mask = INTVAL (operands[3]);
3643 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3644 GEN_INT ((mask >> 0) & 3),
3645 GEN_INT ((mask >> 2) & 3),
3646 GEN_INT (((mask >> 4) & 3) + 4),
3647 GEN_INT (((mask >> 6) & 3) + 4)));
3648 DONE;
3649 })
3650
3651 (define_insn "*avx_shufps_<mode>"
3652 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3653 (vec_select:SSEMODE4S
3654 (vec_concat:<ssedoublesizemode>
3655 (match_operand:SSEMODE4S 1 "register_operand" "x")
3656 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3657 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3658 (match_operand 4 "const_0_to_3_operand" "")
3659 (match_operand 5 "const_4_to_7_operand" "")
3660 (match_operand 6 "const_4_to_7_operand" "")])))]
3661 "TARGET_AVX"
3662 {
3663 int mask = 0;
3664 mask |= INTVAL (operands[3]) << 0;
3665 mask |= INTVAL (operands[4]) << 2;
3666 mask |= (INTVAL (operands[5]) - 4) << 4;
3667 mask |= (INTVAL (operands[6]) - 4) << 6;
3668 operands[3] = GEN_INT (mask);
3669
3670 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3671 }
3672 [(set_attr "type" "sselog")
3673 (set_attr "length_immediate" "1")
3674 (set_attr "prefix" "vex")
3675 (set_attr "mode" "V4SF")])
3676
3677 (define_insn "sse_shufps_<mode>"
3678 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3679 (vec_select:SSEMODE4S
3680 (vec_concat:<ssedoublesizemode>
3681 (match_operand:SSEMODE4S 1 "register_operand" "0")
3682 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3683 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3684 (match_operand 4 "const_0_to_3_operand" "")
3685 (match_operand 5 "const_4_to_7_operand" "")
3686 (match_operand 6 "const_4_to_7_operand" "")])))]
3687 "TARGET_SSE"
3688 {
3689 int mask = 0;
3690 mask |= INTVAL (operands[3]) << 0;
3691 mask |= INTVAL (operands[4]) << 2;
3692 mask |= (INTVAL (operands[5]) - 4) << 4;
3693 mask |= (INTVAL (operands[6]) - 4) << 6;
3694 operands[3] = GEN_INT (mask);
3695
3696 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3697 }
3698 [(set_attr "type" "sselog")
3699 (set_attr "length_immediate" "1")
3700 (set_attr "mode" "V4SF")])
3701
3702 (define_insn "sse_storehps"
3703 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3704 (vec_select:V2SF
3705 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3706 (parallel [(const_int 2) (const_int 3)])))]
3707 "TARGET_SSE"
3708 "@
3709 %vmovhps\t{%1, %0|%0, %1}
3710 %vmovhlps\t{%1, %d0|%d0, %1}
3711 %vmovlps\t{%H1, %d0|%d0, %H1}"
3712 [(set_attr "type" "ssemov")
3713 (set_attr "prefix" "maybe_vex")
3714 (set_attr "mode" "V2SF,V4SF,V2SF")])
3715
3716 (define_expand "sse_loadhps_exp"
3717 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3718 (vec_concat:V4SF
3719 (vec_select:V2SF
3720 (match_operand:V4SF 1 "nonimmediate_operand" "")
3721 (parallel [(const_int 0) (const_int 1)]))
3722 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3723 "TARGET_SSE"
3724 {
3725 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3726
3727 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3728
3729 /* Fix up the destination if needed. */
3730 if (dst != operands[0])
3731 emit_move_insn (operands[0], dst);
3732
3733 DONE;
3734 })
3735
3736 (define_insn "*avx_loadhps"
3737 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3738 (vec_concat:V4SF
3739 (vec_select:V2SF
3740 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3741 (parallel [(const_int 0) (const_int 1)]))
3742 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3743 "TARGET_AVX"
3744 "@
3745 vmovhps\t{%2, %1, %0|%0, %1, %2}
3746 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3747 vmovlps\t{%2, %H0|%H0, %2}"
3748 [(set_attr "type" "ssemov")
3749 (set_attr "prefix" "vex")
3750 (set_attr "mode" "V2SF,V4SF,V2SF")])
3751
3752 (define_insn "sse_loadhps"
3753 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3754 (vec_concat:V4SF
3755 (vec_select:V2SF
3756 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3757 (parallel [(const_int 0) (const_int 1)]))
3758 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3759 "TARGET_SSE"
3760 "@
3761 movhps\t{%2, %0|%0, %2}
3762 movlhps\t{%2, %0|%0, %2}
3763 movlps\t{%2, %H0|%H0, %2}"
3764 [(set_attr "type" "ssemov")
3765 (set_attr "mode" "V2SF,V4SF,V2SF")])
3766
3767 (define_insn "*avx_storelps"
3768 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3769 (vec_select:V2SF
3770 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3771 (parallel [(const_int 0) (const_int 1)])))]
3772 "TARGET_AVX"
3773 "@
3774 vmovlps\t{%1, %0|%0, %1}
3775 vmovaps\t{%1, %0|%0, %1}
3776 vmovlps\t{%1, %0, %0|%0, %0, %1}"
3777 [(set_attr "type" "ssemov")
3778 (set_attr "prefix" "vex")
3779 (set_attr "mode" "V2SF,V2DF,V2SF")])
3780
3781 (define_insn "sse_storelps"
3782 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3783 (vec_select:V2SF
3784 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3785 (parallel [(const_int 0) (const_int 1)])))]
3786 "TARGET_SSE"
3787 "@
3788 movlps\t{%1, %0|%0, %1}
3789 movaps\t{%1, %0|%0, %1}
3790 movlps\t{%1, %0|%0, %1}"
3791 [(set_attr "type" "ssemov")
3792 (set_attr "mode" "V2SF,V4SF,V2SF")])
3793
3794 (define_expand "sse_loadlps_exp"
3795 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3796 (vec_concat:V4SF
3797 (match_operand:V2SF 2 "nonimmediate_operand" "")
3798 (vec_select:V2SF
3799 (match_operand:V4SF 1 "nonimmediate_operand" "")
3800 (parallel [(const_int 2) (const_int 3)]))))]
3801 "TARGET_SSE"
3802 {
3803 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3804
3805 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3806
3807 /* Fix up the destination if needed. */
3808 if (dst != operands[0])
3809 emit_move_insn (operands[0], dst);
3810
3811 DONE;
3812 })
3813
3814 (define_insn "*avx_loadlps"
3815 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3816 (vec_concat:V4SF
3817 (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3818 (vec_select:V2SF
3819 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3820 (parallel [(const_int 2) (const_int 3)]))))]
3821 "TARGET_AVX"
3822 "@
3823 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3824 vmovlps\t{%2, %1, %0|%0, %1, %2}
3825 vmovlps\t{%2, %0|%0, %2}"
3826 [(set_attr "type" "sselog,ssemov,ssemov")
3827 (set_attr "length_immediate" "1,*,*")
3828 (set_attr "prefix" "vex")
3829 (set_attr "mode" "V4SF,V2SF,V2SF")])
3830
3831 (define_insn "sse_loadlps"
3832 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3833 (vec_concat:V4SF
3834 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3835 (vec_select:V2SF
3836 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3837 (parallel [(const_int 2) (const_int 3)]))))]
3838 "TARGET_SSE"
3839 "@
3840 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3841 movlps\t{%2, %0|%0, %2}
3842 movlps\t{%2, %0|%0, %2}"
3843 [(set_attr "type" "sselog,ssemov,ssemov")
3844 (set_attr "length_immediate" "1,*,*")
3845 (set_attr "mode" "V4SF,V2SF,V2SF")])
3846
3847 (define_insn "*avx_movss"
3848 [(set (match_operand:V4SF 0 "register_operand" "=x")
3849 (vec_merge:V4SF
3850 (match_operand:V4SF 2 "register_operand" "x")
3851 (match_operand:V4SF 1 "register_operand" "x")
3852 (const_int 1)))]
3853 "TARGET_AVX"
3854 "vmovss\t{%2, %1, %0|%0, %1, %2}"
3855 [(set_attr "type" "ssemov")
3856 (set_attr "prefix" "vex")
3857 (set_attr "mode" "SF")])
3858
3859 (define_insn "sse_movss"
3860 [(set (match_operand:V4SF 0 "register_operand" "=x")
3861 (vec_merge:V4SF
3862 (match_operand:V4SF 2 "register_operand" "x")
3863 (match_operand:V4SF 1 "register_operand" "0")
3864 (const_int 1)))]
3865 "TARGET_SSE"
3866 "movss\t{%2, %0|%0, %2}"
3867 [(set_attr "type" "ssemov")
3868 (set_attr "mode" "SF")])
3869
3870 (define_expand "vec_dupv4sf"
3871 [(set (match_operand:V4SF 0 "register_operand" "")
3872 (vec_duplicate:V4SF
3873 (match_operand:SF 1 "nonimmediate_operand" "")))]
3874 "TARGET_SSE"
3875 {
3876 if (!TARGET_AVX)
3877 operands[1] = force_reg (V4SFmode, operands[1]);
3878 })
3879
3880 (define_insn "*vec_dupv4sf_avx"
3881 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3882 (vec_duplicate:V4SF
3883 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3884 "TARGET_AVX"
3885 "@
3886 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3887 vbroadcastss\t{%1, %0|%0, %1}"
3888 [(set_attr "type" "sselog1,ssemov")
3889 (set_attr "length_immediate" "1,0")
3890 (set_attr "prefix_extra" "0,1")
3891 (set_attr "prefix" "vex")
3892 (set_attr "mode" "V4SF")])
3893
3894 (define_insn "*vec_dupv4sf"
3895 [(set (match_operand:V4SF 0 "register_operand" "=x")
3896 (vec_duplicate:V4SF
3897 (match_operand:SF 1 "register_operand" "0")))]
3898 "TARGET_SSE"
3899 "shufps\t{$0, %0, %0|%0, %0, 0}"
3900 [(set_attr "type" "sselog1")
3901 (set_attr "length_immediate" "1")
3902 (set_attr "mode" "V4SF")])
3903
3904 (define_insn "*vec_concatv2sf_avx"
3905 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3906 (vec_concat:V2SF
3907 (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3908 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3909 "TARGET_AVX"
3910 "@
3911 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3912 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3913 vmovss\t{%1, %0|%0, %1}
3914 punpckldq\t{%2, %0|%0, %2}
3915 movd\t{%1, %0|%0, %1}"
3916 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3917 (set_attr "length_immediate" "*,1,*,*,*")
3918 (set_attr "prefix_extra" "*,1,*,*,*")
3919 (set (attr "prefix")
3920 (if_then_else (eq_attr "alternative" "3,4")
3921 (const_string "orig")
3922 (const_string "vex")))
3923 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3924
3925 ;; Although insertps takes register source, we prefer
3926 ;; unpcklps with register source since it is shorter.
3927 (define_insn "*vec_concatv2sf_sse4_1"
3928 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3929 (vec_concat:V2SF
3930 (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3931 (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
3932 "TARGET_SSE4_1"
3933 "@
3934 unpcklps\t{%2, %0|%0, %2}
3935 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3936 movss\t{%1, %0|%0, %1}
3937 punpckldq\t{%2, %0|%0, %2}
3938 movd\t{%1, %0|%0, %1}"
3939 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3940 (set_attr "prefix_data16" "*,1,*,*,*")
3941 (set_attr "prefix_extra" "*,1,*,*,*")
3942 (set_attr "length_immediate" "*,1,*,*,*")
3943 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3944
3945 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3946 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3947 ;; alternatives pretty much forces the MMX alternative to be chosen.
3948 (define_insn "*vec_concatv2sf_sse"
3949 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3950 (vec_concat:V2SF
3951 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3952 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3953 "TARGET_SSE"
3954 "@
3955 unpcklps\t{%2, %0|%0, %2}
3956 movss\t{%1, %0|%0, %1}
3957 punpckldq\t{%2, %0|%0, %2}
3958 movd\t{%1, %0|%0, %1}"
3959 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3960 (set_attr "mode" "V4SF,SF,DI,DI")])
3961
3962 (define_insn "*vec_concatv4sf_avx"
3963 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3964 (vec_concat:V4SF
3965 (match_operand:V2SF 1 "register_operand" " x,x")
3966 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3967 "TARGET_AVX"
3968 "@
3969 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3970 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3971 [(set_attr "type" "ssemov")
3972 (set_attr "prefix" "vex")
3973 (set_attr "mode" "V4SF,V2SF")])
3974
3975 (define_insn "*vec_concatv4sf_sse"
3976 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3977 (vec_concat:V4SF
3978 (match_operand:V2SF 1 "register_operand" " 0,0")
3979 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3980 "TARGET_SSE"
3981 "@
3982 movlhps\t{%2, %0|%0, %2}
3983 movhps\t{%2, %0|%0, %2}"
3984 [(set_attr "type" "ssemov")
3985 (set_attr "mode" "V4SF,V2SF")])
3986
3987 (define_expand "vec_init<mode>"
3988 [(match_operand:SSEMODE 0 "register_operand" "")
3989 (match_operand 1 "" "")]
3990 "TARGET_SSE"
3991 {
3992 ix86_expand_vector_init (false, operands[0], operands[1]);
3993 DONE;
3994 })
3995
3996 (define_insn "*vec_set<mode>_0_avx"
3997 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3998 (vec_merge:SSEMODE4S
3999 (vec_duplicate:SSEMODE4S
4000 (match_operand:<ssescalarmode> 2
4001 "general_operand" " x,m,*r,x,*rm,x*rfF"))
4002 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
4003 (const_int 1)))]
4004 "TARGET_AVX"
4005 "@
4006 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
4007 vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4008 vmovd\t{%2, %0|%0, %2}
4009 vmovss\t{%2, %1, %0|%0, %1, %2}
4010 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
4011 #"
4012 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
4013 (set_attr "prefix_extra" "*,*,*,*,1,*")
4014 (set_attr "length_immediate" "*,*,*,*,1,*")
4015 (set_attr "prefix" "vex")
4016 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
4017
4018 (define_insn "*vec_set<mode>_0_sse4_1"
4019 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
4020 (vec_merge:SSEMODE4S
4021 (vec_duplicate:SSEMODE4S
4022 (match_operand:<ssescalarmode> 2
4023 "general_operand" " x,m,*r,x,*rm,*rfF"))
4024 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
4025 (const_int 1)))]
4026 "TARGET_SSE4_1"
4027 "@
4028 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
4029 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4030 movd\t{%2, %0|%0, %2}
4031 movss\t{%2, %0|%0, %2}
4032 pinsrd\t{$0, %2, %0|%0, %2, 0}
4033 #"
4034 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
4035 (set_attr "prefix_extra" "*,*,*,*,1,*")
4036 (set_attr "length_immediate" "*,*,*,*,1,*")
4037 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
4038
4039 (define_insn "*vec_set<mode>_0_sse2"
4040 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
4041 (vec_merge:SSEMODE4S
4042 (vec_duplicate:SSEMODE4S
4043 (match_operand:<ssescalarmode> 2
4044 "general_operand" " m,*r,x,x*rfF"))
4045 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
4046 (const_int 1)))]
4047 "TARGET_SSE2"
4048 "@
4049 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
4050 movd\t{%2, %0|%0, %2}
4051 movss\t{%2, %0|%0, %2}
4052 #"
4053 [(set_attr "type" "ssemov")
4054 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
4055
4056 (define_insn "vec_set<mode>_0"
4057 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
4058 (vec_merge:SSEMODE4S
4059 (vec_duplicate:SSEMODE4S
4060 (match_operand:<ssescalarmode> 2
4061 "general_operand" " m,x,x*rfF"))
4062 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
4063 (const_int 1)))]
4064 "TARGET_SSE"
4065 "@
4066 movss\t{%2, %0|%0, %2}
4067 movss\t{%2, %0|%0, %2}
4068 #"
4069 [(set_attr "type" "ssemov")
4070 (set_attr "mode" "SF,SF,*")])
4071
4072 ;; A subset is vec_setv4sf.
4073 (define_insn "*vec_setv4sf_avx"
4074 [(set (match_operand:V4SF 0 "register_operand" "=x")
4075 (vec_merge:V4SF
4076 (vec_duplicate:V4SF
4077 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4078 (match_operand:V4SF 1 "register_operand" "x")
4079 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4080 "TARGET_AVX"
4081 {
4082 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4083 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4084 }
4085 [(set_attr "type" "sselog")
4086 (set_attr "prefix_extra" "1")
4087 (set_attr "length_immediate" "1")
4088 (set_attr "prefix" "vex")
4089 (set_attr "mode" "V4SF")])
4090
4091 (define_insn "*vec_setv4sf_sse4_1"
4092 [(set (match_operand:V4SF 0 "register_operand" "=x")
4093 (vec_merge:V4SF
4094 (vec_duplicate:V4SF
4095 (match_operand:SF 2 "nonimmediate_operand" "xm"))
4096 (match_operand:V4SF 1 "register_operand" "0")
4097 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4098 "TARGET_SSE4_1"
4099 {
4100 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4101 return "insertps\t{%3, %2, %0|%0, %2, %3}";
4102 }
4103 [(set_attr "type" "sselog")
4104 (set_attr "prefix_data16" "1")
4105 (set_attr "prefix_extra" "1")
4106 (set_attr "length_immediate" "1")
4107 (set_attr "mode" "V4SF")])
4108
4109 (define_insn "*avx_insertps"
4110 [(set (match_operand:V4SF 0 "register_operand" "=x")
4111 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4112 (match_operand:V4SF 1 "register_operand" "x")
4113 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4114 UNSPEC_INSERTPS))]
4115 "TARGET_AVX"
4116 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4117 [(set_attr "type" "sselog")
4118 (set_attr "prefix" "vex")
4119 (set_attr "prefix_extra" "1")
4120 (set_attr "length_immediate" "1")
4121 (set_attr "mode" "V4SF")])
4122
4123 (define_insn "sse4_1_insertps"
4124 [(set (match_operand:V4SF 0 "register_operand" "=x")
4125 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4126 (match_operand:V4SF 1 "register_operand" "0")
4127 (match_operand:SI 3 "const_0_to_255_operand" "n")]
4128 UNSPEC_INSERTPS))]
4129 "TARGET_SSE4_1"
4130 "insertps\t{%3, %2, %0|%0, %2, %3}";
4131 [(set_attr "type" "sselog")
4132 (set_attr "prefix_data16" "1")
4133 (set_attr "prefix_extra" "1")
4134 (set_attr "length_immediate" "1")
4135 (set_attr "mode" "V4SF")])
4136
4137 (define_split
4138 [(set (match_operand:SSEMODE4S 0 "memory_operand" "")
4139 (vec_merge:SSEMODE4S
4140 (vec_duplicate:SSEMODE4S
4141 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
4142 (match_dup 0)
4143 (const_int 1)))]
4144 "TARGET_SSE && reload_completed"
4145 [(const_int 0)]
4146 {
4147 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
4148 operands[1]);
4149 DONE;
4150 })
4151
4152 (define_expand "vec_set<mode>"
4153 [(match_operand:SSEMODE 0 "register_operand" "")
4154 (match_operand:<ssescalarmode> 1 "register_operand" "")
4155 (match_operand 2 "const_int_operand" "")]
4156 "TARGET_SSE"
4157 {
4158 ix86_expand_vector_set (false, operands[0], operands[1],
4159 INTVAL (operands[2]));
4160 DONE;
4161 })
4162
4163 (define_insn_and_split "*vec_extractv4sf_0"
4164 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4165 (vec_select:SF
4166 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4167 (parallel [(const_int 0)])))]
4168 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4169 "#"
4170 "&& reload_completed"
4171 [(const_int 0)]
4172 {
4173 rtx op1 = operands[1];
4174 if (REG_P (op1))
4175 op1 = gen_rtx_REG (SFmode, REGNO (op1));
4176 else
4177 op1 = gen_lowpart (SFmode, op1);
4178 emit_move_insn (operands[0], op1);
4179 DONE;
4180 })
4181
4182 (define_expand "avx_vextractf128<mode>"
4183 [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
4184 (match_operand:AVX256MODE 1 "register_operand" "")
4185 (match_operand:SI 2 "const_0_to_1_operand" "")]
4186 "TARGET_AVX"
4187 {
4188 switch (INTVAL (operands[2]))
4189 {
4190 case 0:
4191 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
4192 break;
4193 case 1:
4194 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
4195 break;
4196 default:
4197 gcc_unreachable ();
4198 }
4199 DONE;
4200 })
4201
4202 (define_insn_and_split "vec_extract_lo_<mode>"
4203 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4204 (vec_select:<avxhalfvecmode>
4205 (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x")
4206 (parallel [(const_int 0) (const_int 1)])))]
4207 "TARGET_AVX"
4208 "#"
4209 "&& reload_completed"
4210 [(const_int 0)]
4211 {
4212 rtx op1 = operands[1];
4213 if (REG_P (op1))
4214 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4215 else
4216 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4217 emit_move_insn (operands[0], op1);
4218 DONE;
4219 })
4220
4221 (define_insn "vec_extract_hi_<mode>"
4222 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4223 (vec_select:<avxhalfvecmode>
4224 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4225 (parallel [(const_int 2) (const_int 3)])))]
4226 "TARGET_AVX"
4227 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4228 [(set_attr "type" "sselog")
4229 (set_attr "prefix_extra" "1")
4230 (set_attr "length_immediate" "1")
4231 (set_attr "memory" "none,store")
4232 (set_attr "prefix" "vex")
4233 (set_attr "mode" "V8SF")])
4234
4235 (define_insn_and_split "vec_extract_lo_<mode>"
4236 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4237 (vec_select:<avxhalfvecmode>
4238 (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x")
4239 (parallel [(const_int 0) (const_int 1)
4240 (const_int 2) (const_int 3)])))]
4241 "TARGET_AVX"
4242 "#"
4243 "&& reload_completed"
4244 [(const_int 0)]
4245 {
4246 rtx op1 = operands[1];
4247 if (REG_P (op1))
4248 op1 = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (op1));
4249 else
4250 op1 = gen_lowpart (<avxhalfvecmode>mode, op1);
4251 emit_move_insn (operands[0], op1);
4252 DONE;
4253 })
4254
4255 (define_insn "vec_extract_hi_<mode>"
4256 [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
4257 (vec_select:<avxhalfvecmode>
4258 (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4259 (parallel [(const_int 4) (const_int 5)
4260 (const_int 6) (const_int 7)])))]
4261 "TARGET_AVX"
4262 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4263 [(set_attr "type" "sselog")
4264 (set_attr "prefix_extra" "1")
4265 (set_attr "length_immediate" "1")
4266 (set_attr "memory" "none,store")
4267 (set_attr "prefix" "vex")
4268 (set_attr "mode" "V8SF")])
4269
4270 (define_insn_and_split "vec_extract_lo_v16hi"
4271 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4272 (vec_select:V8HI
4273 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4274 (parallel [(const_int 0) (const_int 1)
4275 (const_int 2) (const_int 3)
4276 (const_int 4) (const_int 5)
4277 (const_int 6) (const_int 7)])))]
4278 "TARGET_AVX"
4279 "#"
4280 "&& reload_completed"
4281 [(const_int 0)]
4282 {
4283 rtx op1 = operands[1];
4284 if (REG_P (op1))
4285 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4286 else
4287 op1 = gen_lowpart (V8HImode, op1);
4288 emit_move_insn (operands[0], op1);
4289 DONE;
4290 })
4291
4292 (define_insn "vec_extract_hi_v16hi"
4293 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4294 (vec_select:V8HI
4295 (match_operand:V16HI 1 "register_operand" "x,x")
4296 (parallel [(const_int 8) (const_int 9)
4297 (const_int 10) (const_int 11)
4298 (const_int 12) (const_int 13)
4299 (const_int 14) (const_int 15)])))]
4300 "TARGET_AVX"
4301 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4302 [(set_attr "type" "sselog")
4303 (set_attr "prefix_extra" "1")
4304 (set_attr "length_immediate" "1")
4305 (set_attr "memory" "none,store")
4306 (set_attr "prefix" "vex")
4307 (set_attr "mode" "V8SF")])
4308
4309 (define_insn_and_split "vec_extract_lo_v32qi"
4310 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4311 (vec_select:V16QI
4312 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4313 (parallel [(const_int 0) (const_int 1)
4314 (const_int 2) (const_int 3)
4315 (const_int 4) (const_int 5)
4316 (const_int 6) (const_int 7)
4317 (const_int 8) (const_int 9)
4318 (const_int 10) (const_int 11)
4319 (const_int 12) (const_int 13)
4320 (const_int 14) (const_int 15)])))]
4321 "TARGET_AVX"
4322 "#"
4323 "&& reload_completed"
4324 [(const_int 0)]
4325 {
4326 rtx op1 = operands[1];
4327 if (REG_P (op1))
4328 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4329 else
4330 op1 = gen_lowpart (V16QImode, op1);
4331 emit_move_insn (operands[0], op1);
4332 DONE;
4333 })
4334
4335 (define_insn "vec_extract_hi_v32qi"
4336 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4337 (vec_select:V16QI
4338 (match_operand:V32QI 1 "register_operand" "x,x")
4339 (parallel [(const_int 16) (const_int 17)
4340 (const_int 18) (const_int 19)
4341 (const_int 20) (const_int 21)
4342 (const_int 22) (const_int 23)
4343 (const_int 24) (const_int 25)
4344 (const_int 26) (const_int 27)
4345 (const_int 28) (const_int 29)
4346 (const_int 30) (const_int 31)])))]
4347 "TARGET_AVX"
4348 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4349 [(set_attr "type" "sselog")
4350 (set_attr "prefix_extra" "1")
4351 (set_attr "length_immediate" "1")
4352 (set_attr "memory" "none,store")
4353 (set_attr "prefix" "vex")
4354 (set_attr "mode" "V8SF")])
4355
4356 (define_insn "*sse4_1_extractps"
4357 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4358 (vec_select:SF
4359 (match_operand:V4SF 1 "register_operand" "x")
4360 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4361 "TARGET_SSE4_1"
4362 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4363 [(set_attr "type" "sselog")
4364 (set_attr "prefix_data16" "1")
4365 (set_attr "prefix_extra" "1")
4366 (set_attr "length_immediate" "1")
4367 (set_attr "prefix" "maybe_vex")
4368 (set_attr "mode" "V4SF")])
4369
4370 (define_insn_and_split "*vec_extract_v4sf_mem"
4371 [(set (match_operand:SF 0 "register_operand" "=x*rf")
4372 (vec_select:SF
4373 (match_operand:V4SF 1 "memory_operand" "o")
4374 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4375 ""
4376 "#"
4377 "reload_completed"
4378 [(const_int 0)]
4379 {
4380 int i = INTVAL (operands[2]);
4381
4382 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4383 DONE;
4384 })
4385
4386 (define_expand "vec_extract<mode>"
4387 [(match_operand:<avxscalarmode> 0 "register_operand" "")
4388 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4389 (match_operand 2 "const_int_operand" "")]
4390 "TARGET_SSE"
4391 {
4392 ix86_expand_vector_extract (false, operands[0], operands[1],
4393 INTVAL (operands[2]));
4394 DONE;
4395 })
4396
4397 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4398 ;;
4399 ;; Parallel double-precision floating point element swizzling
4400 ;;
4401 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4402
4403 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4404 (define_insn "avx_unpckhpd256"
4405 [(set (match_operand:V4DF 0 "register_operand" "=x")
4406 (vec_select:V4DF
4407 (vec_concat:V8DF
4408 (match_operand:V4DF 1 "register_operand" "x")
4409 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4410 (parallel [(const_int 1) (const_int 5)
4411 (const_int 3) (const_int 7)])))]
4412 "TARGET_AVX"
4413 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4414 [(set_attr "type" "sselog")
4415 (set_attr "prefix" "vex")
4416 (set_attr "mode" "V4DF")])
4417
4418 (define_expand "vec_interleave_highv4df"
4419 [(set (match_dup 3)
4420 (vec_select:V4DF
4421 (vec_concat:V8DF
4422 (match_operand:V4DF 1 "register_operand" "x")
4423 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4424 (parallel [(const_int 0) (const_int 4)
4425 (const_int 2) (const_int 6)])))
4426 (set (match_dup 4)
4427 (vec_select:V4DF
4428 (vec_concat:V8DF
4429 (match_dup 1)
4430 (match_dup 2))
4431 (parallel [(const_int 1) (const_int 5)
4432 (const_int 3) (const_int 7)])))
4433 (set (match_operand:V4DF 0 "register_operand" "")
4434 (vec_select:V4DF
4435 (vec_concat:V8DF
4436 (match_dup 3)
4437 (match_dup 4))
4438 (parallel [(const_int 2) (const_int 3)
4439 (const_int 6) (const_int 7)])))]
4440 "TARGET_AVX"
4441 {
4442 operands[3] = gen_reg_rtx (V4DFmode);
4443 operands[4] = gen_reg_rtx (V4DFmode);
4444 })
4445
4446
4447 (define_expand "vec_interleave_highv2df"
4448 [(set (match_operand:V2DF 0 "register_operand" "")
4449 (vec_select:V2DF
4450 (vec_concat:V4DF
4451 (match_operand:V2DF 1 "nonimmediate_operand" "")
4452 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4453 (parallel [(const_int 1)
4454 (const_int 3)])))]
4455 "TARGET_SSE2"
4456 {
4457 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4458 operands[2] = force_reg (V2DFmode, operands[2]);
4459 })
4460
4461 (define_insn "*avx_interleave_highv2df"
4462 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4463 (vec_select:V2DF
4464 (vec_concat:V4DF
4465 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4466 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4467 (parallel [(const_int 1)
4468 (const_int 3)])))]
4469 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4470 "@
4471 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4472 vmovddup\t{%H1, %0|%0, %H1}
4473 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4474 vmovhpd\t{%1, %0|%0, %1}"
4475 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4476 (set_attr "prefix" "vex")
4477 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4478
4479 (define_insn "*sse3_interleave_highv2df"
4480 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4481 (vec_select:V2DF
4482 (vec_concat:V4DF
4483 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4484 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4485 (parallel [(const_int 1)
4486 (const_int 3)])))]
4487 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4488 "@
4489 unpckhpd\t{%2, %0|%0, %2}
4490 movddup\t{%H1, %0|%0, %H1}
4491 movlpd\t{%H1, %0|%0, %H1}
4492 movhpd\t{%1, %0|%0, %1}"
4493 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4494 (set_attr "prefix_data16" "*,*,1,1")
4495 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4496
4497 (define_insn "*sse2_interleave_highv2df"
4498 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
4499 (vec_select:V2DF
4500 (vec_concat:V4DF
4501 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4502 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4503 (parallel [(const_int 1)
4504 (const_int 3)])))]
4505 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4506 "@
4507 unpckhpd\t{%2, %0|%0, %2}
4508 movlpd\t{%H1, %0|%0, %H1}
4509 movhpd\t{%1, %0|%0, %1}"
4510 [(set_attr "type" "sselog,ssemov,ssemov")
4511 (set_attr "prefix_data16" "*,1,1")
4512 (set_attr "mode" "V2DF,V1DF,V1DF")])
4513
4514 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4515 (define_expand "avx_movddup256"
4516 [(set (match_operand:V4DF 0 "register_operand" "")
4517 (vec_select:V4DF
4518 (vec_concat:V8DF
4519 (match_operand:V4DF 1 "nonimmediate_operand" "")
4520 (match_dup 1))
4521 (parallel [(const_int 0) (const_int 4)
4522 (const_int 2) (const_int 6)])))]
4523 "TARGET_AVX")
4524
4525 (define_expand "avx_unpcklpd256"
4526 [(set (match_operand:V4DF 0 "register_operand" "")
4527 (vec_select:V4DF
4528 (vec_concat:V8DF
4529 (match_operand:V4DF 1 "register_operand" "")
4530 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4531 (parallel [(const_int 0) (const_int 4)
4532 (const_int 2) (const_int 6)])))]
4533 "TARGET_AVX")
4534
4535 (define_insn "*avx_unpcklpd256"
4536 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4537 (vec_select:V4DF
4538 (vec_concat:V8DF
4539 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4540 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4541 (parallel [(const_int 0) (const_int 4)
4542 (const_int 2) (const_int 6)])))]
4543 "TARGET_AVX
4544 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4545 "@
4546 vmovddup\t{%1, %0|%0, %1}
4547 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4548 [(set_attr "type" "sselog")
4549 (set_attr "prefix" "vex")
4550 (set_attr "mode" "V4DF")])
4551
4552 (define_expand "vec_interleave_lowv4df"
4553 [(set (match_dup 3)
4554 (vec_select:V4DF
4555 (vec_concat:V8DF
4556 (match_operand:V4DF 1 "register_operand" "x")
4557 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4558 (parallel [(const_int 0) (const_int 4)
4559 (const_int 2) (const_int 6)])))
4560 (set (match_dup 4)
4561 (vec_select:V4DF
4562 (vec_concat:V8DF
4563 (match_dup 1)
4564 (match_dup 2))
4565 (parallel [(const_int 1) (const_int 5)
4566 (const_int 3) (const_int 7)])))
4567 (set (match_operand:V4DF 0 "register_operand" "")
4568 (vec_select:V4DF
4569 (vec_concat:V8DF
4570 (match_dup 3)
4571 (match_dup 4))
4572 (parallel [(const_int 0) (const_int 1)
4573 (const_int 4) (const_int 5)])))]
4574 "TARGET_AVX"
4575 {
4576 operands[3] = gen_reg_rtx (V4DFmode);
4577 operands[4] = gen_reg_rtx (V4DFmode);
4578 })
4579
4580 (define_expand "vec_interleave_lowv2df"
4581 [(set (match_operand:V2DF 0 "register_operand" "")
4582 (vec_select:V2DF
4583 (vec_concat:V4DF
4584 (match_operand:V2DF 1 "nonimmediate_operand" "")
4585 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4586 (parallel [(const_int 0)
4587 (const_int 2)])))]
4588 "TARGET_SSE2"
4589 {
4590 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4591 operands[1] = force_reg (V2DFmode, operands[1]);
4592 })
4593
4594 (define_insn "*avx_interleave_lowv2df"
4595 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4596 (vec_select:V2DF
4597 (vec_concat:V4DF
4598 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4599 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4600 (parallel [(const_int 0)
4601 (const_int 2)])))]
4602 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4603 "@
4604 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4605 vmovddup\t{%1, %0|%0, %1}
4606 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4607 vmovlpd\t{%2, %H0|%H0, %2}"
4608 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4609 (set_attr "prefix" "vex")
4610 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4611
4612 (define_insn "*sse3_interleave_lowv2df"
4613 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4614 (vec_select:V2DF
4615 (vec_concat:V4DF
4616 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4617 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4618 (parallel [(const_int 0)
4619 (const_int 2)])))]
4620 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4621 "@
4622 unpcklpd\t{%2, %0|%0, %2}
4623 movddup\t{%1, %0|%0, %1}
4624 movhpd\t{%2, %0|%0, %2}
4625 movlpd\t{%2, %H0|%H0, %2}"
4626 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4627 (set_attr "prefix_data16" "*,*,1,1")
4628 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4629
4630 (define_insn "*sse2_interleave_lowv2df"
4631 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4632 (vec_select:V2DF
4633 (vec_concat:V4DF
4634 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4635 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4636 (parallel [(const_int 0)
4637 (const_int 2)])))]
4638 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4639 "@
4640 unpcklpd\t{%2, %0|%0, %2}
4641 movhpd\t{%2, %0|%0, %2}
4642 movlpd\t{%2, %H0|%H0, %2}"
4643 [(set_attr "type" "sselog,ssemov,ssemov")
4644 (set_attr "prefix_data16" "*,1,1")
4645 (set_attr "mode" "V2DF,V1DF,V1DF")])
4646
4647 (define_split
4648 [(set (match_operand:V2DF 0 "memory_operand" "")
4649 (vec_select:V2DF
4650 (vec_concat:V4DF
4651 (match_operand:V2DF 1 "register_operand" "")
4652 (match_dup 1))
4653 (parallel [(const_int 0)
4654 (const_int 2)])))]
4655 "TARGET_SSE3 && reload_completed"
4656 [(const_int 0)]
4657 {
4658 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4659 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4660 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4661 DONE;
4662 })
4663
4664 (define_split
4665 [(set (match_operand:V2DF 0 "register_operand" "")
4666 (vec_select:V2DF
4667 (vec_concat:V4DF
4668 (match_operand:V2DF 1 "memory_operand" "")
4669 (match_dup 1))
4670 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4671 (match_operand:SI 3 "const_int_operand" "")])))]
4672 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4673 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4674 {
4675 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4676 })
4677
4678 (define_expand "avx_shufpd256"
4679 [(match_operand:V4DF 0 "register_operand" "")
4680 (match_operand:V4DF 1 "register_operand" "")
4681 (match_operand:V4DF 2 "nonimmediate_operand" "")
4682 (match_operand:SI 3 "const_int_operand" "")]
4683 "TARGET_AVX"
4684 {
4685 int mask = INTVAL (operands[3]);
4686 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4687 GEN_INT (mask & 1),
4688 GEN_INT (mask & 2 ? 5 : 4),
4689 GEN_INT (mask & 4 ? 3 : 2),
4690 GEN_INT (mask & 8 ? 7 : 6)));
4691 DONE;
4692 })
4693
4694 (define_insn "avx_shufpd256_1"
4695 [(set (match_operand:V4DF 0 "register_operand" "=x")
4696 (vec_select:V4DF
4697 (vec_concat:V8DF
4698 (match_operand:V4DF 1 "register_operand" "x")
4699 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4700 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4701 (match_operand 4 "const_4_to_5_operand" "")
4702 (match_operand 5 "const_2_to_3_operand" "")
4703 (match_operand 6 "const_6_to_7_operand" "")])))]
4704 "TARGET_AVX"
4705 {
4706 int mask;
4707 mask = INTVAL (operands[3]);
4708 mask |= (INTVAL (operands[4]) - 4) << 1;
4709 mask |= (INTVAL (operands[5]) - 2) << 2;
4710 mask |= (INTVAL (operands[6]) - 6) << 3;
4711 operands[3] = GEN_INT (mask);
4712
4713 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4714 }
4715 [(set_attr "type" "sselog")
4716 (set_attr "length_immediate" "1")
4717 (set_attr "prefix" "vex")
4718 (set_attr "mode" "V4DF")])
4719
4720 (define_expand "sse2_shufpd"
4721 [(match_operand:V2DF 0 "register_operand" "")
4722 (match_operand:V2DF 1 "register_operand" "")
4723 (match_operand:V2DF 2 "nonimmediate_operand" "")
4724 (match_operand:SI 3 "const_int_operand" "")]
4725 "TARGET_SSE2"
4726 {
4727 int mask = INTVAL (operands[3]);
4728 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4729 GEN_INT (mask & 1),
4730 GEN_INT (mask & 2 ? 3 : 2)));
4731 DONE;
4732 })
4733
4734 (define_expand "vec_extract_even<mode>"
4735 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4736 (match_operand:SSEMODE_EO 1 "register_operand" "")
4737 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4738 ""
4739 {
4740 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4741 DONE;
4742 })
4743
4744 (define_expand "vec_extract_odd<mode>"
4745 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4746 (match_operand:SSEMODE_EO 1 "register_operand" "")
4747 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4748 ""
4749 {
4750 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4751 DONE;
4752 })
4753
4754 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4755 (define_insn "*avx_interleave_highv2di"
4756 [(set (match_operand:V2DI 0 "register_operand" "=x")
4757 (vec_select:V2DI
4758 (vec_concat:V4DI
4759 (match_operand:V2DI 1 "register_operand" "x")
4760 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4761 (parallel [(const_int 1)
4762 (const_int 3)])))]
4763 "TARGET_AVX"
4764 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4765 [(set_attr "type" "sselog")
4766 (set_attr "prefix" "vex")
4767 (set_attr "mode" "TI")])
4768
4769 (define_insn "vec_interleave_highv2di"
4770 [(set (match_operand:V2DI 0 "register_operand" "=x")
4771 (vec_select:V2DI
4772 (vec_concat:V4DI
4773 (match_operand:V2DI 1 "register_operand" "0")
4774 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4775 (parallel [(const_int 1)
4776 (const_int 3)])))]
4777 "TARGET_SSE2"
4778 "punpckhqdq\t{%2, %0|%0, %2}"
4779 [(set_attr "type" "sselog")
4780 (set_attr "prefix_data16" "1")
4781 (set_attr "mode" "TI")])
4782
4783 (define_insn "*avx_interleave_lowv2di"
4784 [(set (match_operand:V2DI 0 "register_operand" "=x")
4785 (vec_select:V2DI
4786 (vec_concat:V4DI
4787 (match_operand:V2DI 1 "register_operand" "x")
4788 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4789 (parallel [(const_int 0)
4790 (const_int 2)])))]
4791 "TARGET_AVX"
4792 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4793 [(set_attr "type" "sselog")
4794 (set_attr "prefix" "vex")
4795 (set_attr "mode" "TI")])
4796
4797 (define_insn "vec_interleave_lowv2di"
4798 [(set (match_operand:V2DI 0 "register_operand" "=x")
4799 (vec_select:V2DI
4800 (vec_concat:V4DI
4801 (match_operand:V2DI 1 "register_operand" "0")
4802 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4803 (parallel [(const_int 0)
4804 (const_int 2)])))]
4805 "TARGET_SSE2"
4806 "punpcklqdq\t{%2, %0|%0, %2}"
4807 [(set_attr "type" "sselog")
4808 (set_attr "prefix_data16" "1")
4809 (set_attr "mode" "TI")])
4810
4811 (define_insn "*avx_shufpd_<mode>"
4812 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4813 (vec_select:SSEMODE2D
4814 (vec_concat:<ssedoublesizemode>
4815 (match_operand:SSEMODE2D 1 "register_operand" "x")
4816 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4817 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4818 (match_operand 4 "const_2_to_3_operand" "")])))]
4819 "TARGET_AVX"
4820 {
4821 int mask;
4822 mask = INTVAL (operands[3]);
4823 mask |= (INTVAL (operands[4]) - 2) << 1;
4824 operands[3] = GEN_INT (mask);
4825
4826 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4827 }
4828 [(set_attr "type" "sselog")
4829 (set_attr "length_immediate" "1")
4830 (set_attr "prefix" "vex")
4831 (set_attr "mode" "V2DF")])
4832
4833 (define_insn "sse2_shufpd_<mode>"
4834 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4835 (vec_select:SSEMODE2D
4836 (vec_concat:<ssedoublesizemode>
4837 (match_operand:SSEMODE2D 1 "register_operand" "0")
4838 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4839 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4840 (match_operand 4 "const_2_to_3_operand" "")])))]
4841 "TARGET_SSE2"
4842 {
4843 int mask;
4844 mask = INTVAL (operands[3]);
4845 mask |= (INTVAL (operands[4]) - 2) << 1;
4846 operands[3] = GEN_INT (mask);
4847
4848 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4849 }
4850 [(set_attr "type" "sselog")
4851 (set_attr "length_immediate" "1")
4852 (set_attr "mode" "V2DF")])
4853
4854 ;; Avoid combining registers from different units in a single alternative,
4855 ;; see comment above inline_secondary_memory_needed function in i386.c
4856 (define_insn "*avx_storehpd"
4857 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4858 (vec_select:DF
4859 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4860 (parallel [(const_int 1)])))]
4861 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4862 "@
4863 vmovhpd\t{%1, %0|%0, %1}
4864 vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4865 #
4866 #
4867 #"
4868 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4869 (set_attr "prefix" "vex")
4870 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4871
4872 (define_insn "sse2_storehpd"
4873 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4874 (vec_select:DF
4875 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4876 (parallel [(const_int 1)])))]
4877 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4878 "@
4879 movhpd\t{%1, %0|%0, %1}
4880 unpckhpd\t%0, %0
4881 #
4882 #
4883 #"
4884 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4885 (set_attr "prefix_data16" "1,*,*,*,*")
4886 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4887
4888 (define_split
4889 [(set (match_operand:DF 0 "register_operand" "")
4890 (vec_select:DF
4891 (match_operand:V2DF 1 "memory_operand" "")
4892 (parallel [(const_int 1)])))]
4893 "TARGET_SSE2 && reload_completed"
4894 [(set (match_dup 0) (match_dup 1))]
4895 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4896
4897 ;; Avoid combining registers from different units in a single alternative,
4898 ;; see comment above inline_secondary_memory_needed function in i386.c
4899 (define_insn "sse2_storelpd"
4900 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4901 (vec_select:DF
4902 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4903 (parallel [(const_int 0)])))]
4904 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4905 "@
4906 %vmovlpd\t{%1, %0|%0, %1}
4907 #
4908 #
4909 #
4910 #"
4911 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4912 (set_attr "prefix_data16" "1,*,*,*,*")
4913 (set_attr "prefix" "maybe_vex")
4914 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4915
4916 (define_split
4917 [(set (match_operand:DF 0 "register_operand" "")
4918 (vec_select:DF
4919 (match_operand:V2DF 1 "nonimmediate_operand" "")
4920 (parallel [(const_int 0)])))]
4921 "TARGET_SSE2 && reload_completed"
4922 [(const_int 0)]
4923 {
4924 rtx op1 = operands[1];
4925 if (REG_P (op1))
4926 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4927 else
4928 op1 = gen_lowpart (DFmode, op1);
4929 emit_move_insn (operands[0], op1);
4930 DONE;
4931 })
4932
4933 (define_expand "sse2_loadhpd_exp"
4934 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4935 (vec_concat:V2DF
4936 (vec_select:DF
4937 (match_operand:V2DF 1 "nonimmediate_operand" "")
4938 (parallel [(const_int 0)]))
4939 (match_operand:DF 2 "nonimmediate_operand" "")))]
4940 "TARGET_SSE2"
4941 {
4942 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4943
4944 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4945
4946 /* Fix up the destination if needed. */
4947 if (dst != operands[0])
4948 emit_move_insn (operands[0], dst);
4949
4950 DONE;
4951 })
4952
4953 ;; Avoid combining registers from different units in a single alternative,
4954 ;; see comment above inline_secondary_memory_needed function in i386.c
4955 (define_insn "*avx_loadhpd"
4956 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
4957 (vec_concat:V2DF
4958 (vec_select:DF
4959 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4960 (parallel [(const_int 0)]))
4961 (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
4962 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4963 "@
4964 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4965 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4966 #
4967 #
4968 #"
4969 [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4970 (set_attr "prefix" "vex")
4971 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4972
4973 (define_insn "sse2_loadhpd"
4974 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
4975 (vec_concat:V2DF
4976 (vec_select:DF
4977 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4978 (parallel [(const_int 0)]))
4979 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
4980 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4981 "@
4982 movhpd\t{%2, %0|%0, %2}
4983 unpcklpd\t{%2, %0|%0, %2}
4984 shufpd\t{$1, %1, %0|%0, %1, 1}
4985 #
4986 #
4987 #"
4988 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4989 (set_attr "prefix_data16" "1,*,*,*,*,*")
4990 (set_attr "length_immediate" "*,*,1,*,*,*")
4991 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4992
4993 (define_split
4994 [(set (match_operand:V2DF 0 "memory_operand" "")
4995 (vec_concat:V2DF
4996 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4997 (match_operand:DF 1 "register_operand" "")))]
4998 "TARGET_SSE2 && reload_completed"
4999 [(set (match_dup 0) (match_dup 1))]
5000 "operands[0] = adjust_address (operands[0], DFmode, 8);")
5001
5002 (define_expand "sse2_loadlpd_exp"
5003 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
5004 (vec_concat:V2DF
5005 (match_operand:DF 2 "nonimmediate_operand" "")
5006 (vec_select:DF
5007 (match_operand:V2DF 1 "nonimmediate_operand" "")
5008 (parallel [(const_int 1)]))))]
5009 "TARGET_SSE2"
5010 {
5011 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
5012
5013 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
5014
5015 /* Fix up the destination if needed. */
5016 if (dst != operands[0])
5017 emit_move_insn (operands[0], dst);
5018
5019 DONE;
5020 })
5021
5022 ;; Avoid combining registers from different units in a single alternative,
5023 ;; see comment above inline_secondary_memory_needed function in i386.c
5024 (define_insn "*avx_loadlpd"
5025 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
5026 (vec_concat:V2DF
5027 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
5028 (vec_select:DF
5029 (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
5030 (parallel [(const_int 1)]))))]
5031 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5032 "@
5033 vmovsd\t{%2, %0|%0, %2}
5034 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5035 vmovsd\t{%2, %1, %0|%0, %1, %2}
5036 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
5037 #
5038 #
5039 #"
5040 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
5041 (set_attr "prefix" "vex")
5042 (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
5043
5044 (define_insn "sse2_loadlpd"
5045 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
5046 (vec_concat:V2DF
5047 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
5048 (vec_select:DF
5049 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
5050 (parallel [(const_int 1)]))))]
5051 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5052 "@
5053 movsd\t{%2, %0|%0, %2}
5054 movlpd\t{%2, %0|%0, %2}
5055 movsd\t{%2, %0|%0, %2}
5056 shufpd\t{$2, %2, %0|%0, %2, 2}
5057 movhpd\t{%H1, %0|%0, %H1}
5058 #
5059 #
5060 #"
5061 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
5062 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
5063 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
5064 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
5065
5066 (define_split
5067 [(set (match_operand:V2DF 0 "memory_operand" "")
5068 (vec_concat:V2DF
5069 (match_operand:DF 1 "register_operand" "")
5070 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
5071 "TARGET_SSE2 && reload_completed"
5072 [(set (match_dup 0) (match_dup 1))]
5073 "operands[0] = adjust_address (operands[0], DFmode, 8);")
5074
5075 ;; Not sure these two are ever used, but it doesn't hurt to have
5076 ;; them. -aoliva
5077 (define_insn "*vec_extractv2df_1_sse"
5078 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5079 (vec_select:DF
5080 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
5081 (parallel [(const_int 1)])))]
5082 "!TARGET_SSE2 && TARGET_SSE
5083 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5084 "@
5085 movhps\t{%1, %0|%0, %1}
5086 movhlps\t{%1, %0|%0, %1}
5087 movlps\t{%H1, %0|%0, %H1}"
5088 [(set_attr "type" "ssemov")
5089 (set_attr "mode" "V2SF,V4SF,V2SF")])
5090
5091 (define_insn "*vec_extractv2df_0_sse"
5092 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
5093 (vec_select:DF
5094 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
5095 (parallel [(const_int 0)])))]
5096 "!TARGET_SSE2 && TARGET_SSE
5097 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5098 "@
5099 movlps\t{%1, %0|%0, %1}
5100 movaps\t{%1, %0|%0, %1}
5101 movlps\t{%1, %0|%0, %1}"
5102 [(set_attr "type" "ssemov")
5103 (set_attr "mode" "V2SF,V4SF,V2SF")])
5104
5105 (define_insn "*avx_movsd"
5106 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
5107 (vec_merge:V2DF
5108 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
5109 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
5110 (const_int 1)))]
5111 "TARGET_AVX"
5112 "@
5113 vmovsd\t{%2, %1, %0|%0, %1, %2}
5114 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5115 vmovlpd\t{%2, %0|%0, %2}
5116 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
5117 vmovhps\t{%1, %H0|%H0, %1}"
5118 [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
5119 (set_attr "prefix" "vex")
5120 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
5121
5122 (define_insn "sse2_movsd"
5123 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
5124 (vec_merge:V2DF
5125 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
5126 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
5127 (const_int 1)))]
5128 "TARGET_SSE2"
5129 "@
5130 movsd\t{%2, %0|%0, %2}
5131 movlpd\t{%2, %0|%0, %2}
5132 movlpd\t{%2, %0|%0, %2}
5133 shufpd\t{$2, %2, %0|%0, %2, 2}
5134 movhps\t{%H1, %0|%0, %H1}
5135 movhps\t{%1, %H0|%H0, %1}"
5136 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
5137 (set_attr "prefix_data16" "*,1,1,*,*,*")
5138 (set_attr "length_immediate" "*,*,*,1,*,*")
5139 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
5140
5141 (define_insn "*vec_dupv2df_sse3"
5142 [(set (match_operand:V2DF 0 "register_operand" "=x")
5143 (vec_duplicate:V2DF
5144 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5145 "TARGET_SSE3"
5146 "%vmovddup\t{%1, %0|%0, %1}"
5147 [(set_attr "type" "sselog1")
5148 (set_attr "prefix" "maybe_vex")
5149 (set_attr "mode" "DF")])
5150
5151 (define_insn "vec_dupv2df"
5152 [(set (match_operand:V2DF 0 "register_operand" "=x")
5153 (vec_duplicate:V2DF
5154 (match_operand:DF 1 "register_operand" "0")))]
5155 "TARGET_SSE2"
5156 "unpcklpd\t%0, %0"
5157 [(set_attr "type" "sselog1")
5158 (set_attr "mode" "V2DF")])
5159
5160 (define_insn "*vec_concatv2df_sse3"
5161 [(set (match_operand:V2DF 0 "register_operand" "=x")
5162 (vec_concat:V2DF
5163 (match_operand:DF 1 "nonimmediate_operand" "xm")
5164 (match_dup 1)))]
5165 "TARGET_SSE3"
5166 "%vmovddup\t{%1, %0|%0, %1}"
5167 [(set_attr "type" "sselog1")
5168 (set_attr "prefix" "maybe_vex")
5169 (set_attr "mode" "DF")])
5170
5171 (define_insn "*vec_concatv2df_avx"
5172 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
5173 (vec_concat:V2DF
5174 (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5175 (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
5176 "TARGET_AVX"
5177 "@
5178 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5179 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5180 vmovsd\t{%1, %0|%0, %1}"
5181 [(set_attr "type" "ssemov")
5182 (set_attr "prefix" "vex")
5183 (set_attr "mode" "DF,V1DF,DF")])
5184
5185 (define_insn "*vec_concatv2df"
5186 [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
5187 (vec_concat:V2DF
5188 (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5189 (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
5190 "TARGET_SSE"
5191 "@
5192 unpcklpd\t{%2, %0|%0, %2}
5193 movhpd\t{%2, %0|%0, %2}
5194 movsd\t{%1, %0|%0, %1}
5195 movlhps\t{%2, %0|%0, %2}
5196 movhps\t{%2, %0|%0, %2}"
5197 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5198 (set_attr "prefix_data16" "*,1,*,*,*")
5199 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5200
5201 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5202 ;;
5203 ;; Parallel integral arithmetic
5204 ;;
5205 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5206
5207 (define_expand "neg<mode>2"
5208 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5209 (minus:SSEMODEI
5210 (match_dup 2)
5211 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5212 "TARGET_SSE2"
5213 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
5214
5215 (define_expand "<plusminus_insn><mode>3"
5216 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5217 (plusminus:SSEMODEI
5218 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5219 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5220 "TARGET_SSE2"
5221 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5222
5223 (define_insn "*avx_<plusminus_insn><mode>3"
5224 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5225 (plusminus:SSEMODEI
5226 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
5227 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5228 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5229 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5230 [(set_attr "type" "sseiadd")
5231 (set_attr "prefix" "vex")
5232 (set_attr "mode" "TI")])
5233
5234 (define_insn "*<plusminus_insn><mode>3"
5235 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5236 (plusminus:SSEMODEI
5237 (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
5238 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5239 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5240 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5241 [(set_attr "type" "sseiadd")
5242 (set_attr "prefix_data16" "1")
5243 (set_attr "mode" "TI")])
5244
5245 (define_expand "sse2_<plusminus_insn><mode>3"
5246 [(set (match_operand:SSEMODE12 0 "register_operand" "")
5247 (sat_plusminus:SSEMODE12
5248 (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5249 (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5250 "TARGET_SSE2"
5251 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5252
5253 (define_insn "*avx_<plusminus_insn><mode>3"
5254 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5255 (sat_plusminus:SSEMODE12
5256 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
5257 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5258 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5259 "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5260 [(set_attr "type" "sseiadd")
5261 (set_attr "prefix" "vex")
5262 (set_attr "mode" "TI")])
5263
5264 (define_insn "*sse2_<plusminus_insn><mode>3"
5265 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5266 (sat_plusminus:SSEMODE12
5267 (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
5268 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5269 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5270 "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
5271 [(set_attr "type" "sseiadd")
5272 (set_attr "prefix_data16" "1")
5273 (set_attr "mode" "TI")])
5274
5275 (define_insn_and_split "mulv16qi3"
5276 [(set (match_operand:V16QI 0 "register_operand" "")
5277 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5278 (match_operand:V16QI 2 "register_operand" "")))]
5279 "TARGET_SSE2
5280 && can_create_pseudo_p ()"
5281 "#"
5282 "&& 1"
5283 [(const_int 0)]
5284 {
5285 rtx t[6];
5286 int i;
5287
5288 for (i = 0; i < 6; ++i)
5289 t[i] = gen_reg_rtx (V16QImode);
5290
5291 /* Unpack data such that we've got a source byte in each low byte of
5292 each word. We don't care what goes into the high byte of each word.
5293 Rather than trying to get zero in there, most convenient is to let
5294 it be a copy of the low byte. */
5295 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5296 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5297 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5298 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5299
5300 /* Multiply words. The end-of-line annotations here give a picture of what
5301 the output of that instruction looks like. Dot means don't care; the
5302 letters are the bytes of the result with A being the most significant. */
5303 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5304 gen_lowpart (V8HImode, t[0]),
5305 gen_lowpart (V8HImode, t[1])));
5306 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5307 gen_lowpart (V8HImode, t[2]),
5308 gen_lowpart (V8HImode, t[3])));
5309
5310 /* Extract the even bytes and merge them back together. */
5311 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5312 DONE;
5313 })
5314
5315 (define_expand "mulv8hi3"
5316 [(set (match_operand:V8HI 0 "register_operand" "")
5317 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5318 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5319 "TARGET_SSE2"
5320 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5321
5322 (define_insn "*avx_mulv8hi3"
5323 [(set (match_operand:V8HI 0 "register_operand" "=x")
5324 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5325 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5326 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5327 "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5328 [(set_attr "type" "sseimul")
5329 (set_attr "prefix" "vex")
5330 (set_attr "mode" "TI")])
5331
5332 (define_insn "*mulv8hi3"
5333 [(set (match_operand:V8HI 0 "register_operand" "=x")
5334 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5335 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5336 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5337 "pmullw\t{%2, %0|%0, %2}"
5338 [(set_attr "type" "sseimul")
5339 (set_attr "prefix_data16" "1")
5340 (set_attr "mode" "TI")])
5341
5342 (define_expand "<s>mulv8hi3_highpart"
5343 [(set (match_operand:V8HI 0 "register_operand" "")
5344 (truncate:V8HI
5345 (lshiftrt:V8SI
5346 (mult:V8SI
5347 (any_extend:V8SI
5348 (match_operand:V8HI 1 "nonimmediate_operand" ""))
5349 (any_extend:V8SI
5350 (match_operand:V8HI 2 "nonimmediate_operand" "")))
5351 (const_int 16))))]
5352 "TARGET_SSE2"
5353 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5354
5355 (define_insn "*avx_<s>mulv8hi3_highpart"
5356 [(set (match_operand:V8HI 0 "register_operand" "=x")
5357 (truncate:V8HI
5358 (lshiftrt:V8SI
5359 (mult:V8SI
5360 (any_extend:V8SI
5361 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5362 (any_extend:V8SI
5363 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5364 (const_int 16))))]
5365 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5366 "vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5367 [(set_attr "type" "sseimul")
5368 (set_attr "prefix" "vex")
5369 (set_attr "mode" "TI")])
5370
5371 (define_insn "*<s>mulv8hi3_highpart"
5372 [(set (match_operand:V8HI 0 "register_operand" "=x")
5373 (truncate:V8HI
5374 (lshiftrt:V8SI
5375 (mult:V8SI
5376 (any_extend:V8SI
5377 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5378 (any_extend:V8SI
5379 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5380 (const_int 16))))]
5381 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5382 "pmulh<u>w\t{%2, %0|%0, %2}"
5383 [(set_attr "type" "sseimul")
5384 (set_attr "prefix_data16" "1")
5385 (set_attr "mode" "TI")])
5386
5387 (define_expand "sse2_umulv2siv2di3"
5388 [(set (match_operand:V2DI 0 "register_operand" "")
5389 (mult:V2DI
5390 (zero_extend:V2DI
5391 (vec_select:V2SI
5392 (match_operand:V4SI 1 "nonimmediate_operand" "")
5393 (parallel [(const_int 0) (const_int 2)])))
5394 (zero_extend:V2DI
5395 (vec_select:V2SI
5396 (match_operand:V4SI 2 "nonimmediate_operand" "")
5397 (parallel [(const_int 0) (const_int 2)])))))]
5398 "TARGET_SSE2"
5399 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5400
5401 (define_insn "*avx_umulv2siv2di3"
5402 [(set (match_operand:V2DI 0 "register_operand" "=x")
5403 (mult:V2DI
5404 (zero_extend:V2DI
5405 (vec_select:V2SI
5406 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5407 (parallel [(const_int 0) (const_int 2)])))
5408 (zero_extend:V2DI
5409 (vec_select:V2SI
5410 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5411 (parallel [(const_int 0) (const_int 2)])))))]
5412 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5413 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5414 [(set_attr "type" "sseimul")
5415 (set_attr "prefix" "vex")
5416 (set_attr "mode" "TI")])
5417
5418 (define_insn "*sse2_umulv2siv2di3"
5419 [(set (match_operand:V2DI 0 "register_operand" "=x")
5420 (mult:V2DI
5421 (zero_extend:V2DI
5422 (vec_select:V2SI
5423 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5424 (parallel [(const_int 0) (const_int 2)])))
5425 (zero_extend:V2DI
5426 (vec_select:V2SI
5427 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5428 (parallel [(const_int 0) (const_int 2)])))))]
5429 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5430 "pmuludq\t{%2, %0|%0, %2}"
5431 [(set_attr "type" "sseimul")
5432 (set_attr "prefix_data16" "1")
5433 (set_attr "mode" "TI")])
5434
5435 (define_expand "sse4_1_mulv2siv2di3"
5436 [(set (match_operand:V2DI 0 "register_operand" "")
5437 (mult:V2DI
5438 (sign_extend:V2DI
5439 (vec_select:V2SI
5440 (match_operand:V4SI 1 "nonimmediate_operand" "")
5441 (parallel [(const_int 0) (const_int 2)])))
5442 (sign_extend:V2DI
5443 (vec_select:V2SI
5444 (match_operand:V4SI 2 "nonimmediate_operand" "")
5445 (parallel [(const_int 0) (const_int 2)])))))]
5446 "TARGET_SSE4_1"
5447 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5448
5449 (define_insn "*avx_mulv2siv2di3"
5450 [(set (match_operand:V2DI 0 "register_operand" "=x")
5451 (mult:V2DI
5452 (sign_extend:V2DI
5453 (vec_select:V2SI
5454 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5455 (parallel [(const_int 0) (const_int 2)])))
5456 (sign_extend:V2DI
5457 (vec_select:V2SI
5458 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5459 (parallel [(const_int 0) (const_int 2)])))))]
5460 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5461 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5462 [(set_attr "type" "sseimul")
5463 (set_attr "prefix_extra" "1")
5464 (set_attr "prefix" "vex")
5465 (set_attr "mode" "TI")])
5466
5467 (define_insn "*sse4_1_mulv2siv2di3"
5468 [(set (match_operand:V2DI 0 "register_operand" "=x")
5469 (mult:V2DI
5470 (sign_extend:V2DI
5471 (vec_select:V2SI
5472 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5473 (parallel [(const_int 0) (const_int 2)])))
5474 (sign_extend:V2DI
5475 (vec_select:V2SI
5476 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5477 (parallel [(const_int 0) (const_int 2)])))))]
5478 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5479 "pmuldq\t{%2, %0|%0, %2}"
5480 [(set_attr "type" "sseimul")
5481 (set_attr "prefix_extra" "1")
5482 (set_attr "mode" "TI")])
5483
5484 (define_expand "sse2_pmaddwd"
5485 [(set (match_operand:V4SI 0 "register_operand" "")
5486 (plus:V4SI
5487 (mult:V4SI
5488 (sign_extend:V4SI
5489 (vec_select:V4HI
5490 (match_operand:V8HI 1 "nonimmediate_operand" "")
5491 (parallel [(const_int 0)
5492 (const_int 2)
5493 (const_int 4)
5494 (const_int 6)])))
5495 (sign_extend:V4SI
5496 (vec_select:V4HI
5497 (match_operand:V8HI 2 "nonimmediate_operand" "")
5498 (parallel [(const_int 0)
5499 (const_int 2)
5500 (const_int 4)
5501 (const_int 6)]))))
5502 (mult:V4SI
5503 (sign_extend:V4SI
5504 (vec_select:V4HI (match_dup 1)
5505 (parallel [(const_int 1)
5506 (const_int 3)
5507 (const_int 5)
5508 (const_int 7)])))
5509 (sign_extend:V4SI
5510 (vec_select:V4HI (match_dup 2)
5511 (parallel [(const_int 1)
5512 (const_int 3)
5513 (const_int 5)
5514 (const_int 7)]))))))]
5515 "TARGET_SSE2"
5516 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5517
5518 (define_insn "*avx_pmaddwd"
5519 [(set (match_operand:V4SI 0 "register_operand" "=x")
5520 (plus:V4SI
5521 (mult:V4SI
5522 (sign_extend:V4SI
5523 (vec_select:V4HI
5524 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5525 (parallel [(const_int 0)
5526 (const_int 2)
5527 (const_int 4)
5528 (const_int 6)])))
5529 (sign_extend:V4SI
5530 (vec_select:V4HI
5531 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5532 (parallel [(const_int 0)
5533 (const_int 2)
5534 (const_int 4)
5535 (const_int 6)]))))
5536 (mult:V4SI
5537 (sign_extend:V4SI
5538 (vec_select:V4HI (match_dup 1)
5539 (parallel [(const_int 1)
5540 (const_int 3)
5541 (const_int 5)
5542 (const_int 7)])))
5543 (sign_extend:V4SI
5544 (vec_select:V4HI (match_dup 2)
5545 (parallel [(const_int 1)
5546 (const_int 3)
5547 (const_int 5)
5548 (const_int 7)]))))))]
5549 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5550 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5551 [(set_attr "type" "sseiadd")
5552 (set_attr "prefix" "vex")
5553 (set_attr "mode" "TI")])
5554
5555 (define_insn "*sse2_pmaddwd"
5556 [(set (match_operand:V4SI 0 "register_operand" "=x")
5557 (plus:V4SI
5558 (mult:V4SI
5559 (sign_extend:V4SI
5560 (vec_select:V4HI
5561 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5562 (parallel [(const_int 0)
5563 (const_int 2)
5564 (const_int 4)
5565 (const_int 6)])))
5566 (sign_extend:V4SI
5567 (vec_select:V4HI
5568 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5569 (parallel [(const_int 0)
5570 (const_int 2)
5571 (const_int 4)
5572 (const_int 6)]))))
5573 (mult:V4SI
5574 (sign_extend:V4SI
5575 (vec_select:V4HI (match_dup 1)
5576 (parallel [(const_int 1)
5577 (const_int 3)
5578 (const_int 5)
5579 (const_int 7)])))
5580 (sign_extend:V4SI
5581 (vec_select:V4HI (match_dup 2)
5582 (parallel [(const_int 1)
5583 (const_int 3)
5584 (const_int 5)
5585 (const_int 7)]))))))]
5586 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5587 "pmaddwd\t{%2, %0|%0, %2}"
5588 [(set_attr "type" "sseiadd")
5589 (set_attr "atom_unit" "simul")
5590 (set_attr "prefix_data16" "1")
5591 (set_attr "mode" "TI")])
5592
5593 (define_expand "mulv4si3"
5594 [(set (match_operand:V4SI 0 "register_operand" "")
5595 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5596 (match_operand:V4SI 2 "register_operand" "")))]
5597 "TARGET_SSE2"
5598 {
5599 if (TARGET_SSE4_1 || TARGET_AVX)
5600 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5601 })
5602
5603 (define_insn "*avx_mulv4si3"
5604 [(set (match_operand:V4SI 0 "register_operand" "=x")
5605 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5606 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5607 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5608 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5609 [(set_attr "type" "sseimul")
5610 (set_attr "prefix_extra" "1")
5611 (set_attr "prefix" "vex")
5612 (set_attr "mode" "TI")])
5613
5614 (define_insn "*sse4_1_mulv4si3"
5615 [(set (match_operand:V4SI 0 "register_operand" "=x")
5616 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5617 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5618 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5619 "pmulld\t{%2, %0|%0, %2}"
5620 [(set_attr "type" "sseimul")
5621 (set_attr "prefix_extra" "1")
5622 (set_attr "mode" "TI")])
5623
5624 (define_insn_and_split "*sse2_mulv4si3"
5625 [(set (match_operand:V4SI 0 "register_operand" "")
5626 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5627 (match_operand:V4SI 2 "register_operand" "")))]
5628 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5629 && can_create_pseudo_p ()"
5630 "#"
5631 "&& 1"
5632 [(const_int 0)]
5633 {
5634 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5635 rtx op0, op1, op2;
5636
5637 op0 = operands[0];
5638 op1 = operands[1];
5639 op2 = operands[2];
5640 t1 = gen_reg_rtx (V4SImode);
5641 t2 = gen_reg_rtx (V4SImode);
5642 t3 = gen_reg_rtx (V4SImode);
5643 t4 = gen_reg_rtx (V4SImode);
5644 t5 = gen_reg_rtx (V4SImode);
5645 t6 = gen_reg_rtx (V4SImode);
5646 thirtytwo = GEN_INT (32);
5647
5648 /* Multiply elements 2 and 0. */
5649 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5650 op1, op2));
5651
5652 /* Shift both input vectors down one element, so that elements 3
5653 and 1 are now in the slots for elements 2 and 0. For K8, at
5654 least, this is faster than using a shuffle. */
5655 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5656 gen_lowpart (V1TImode, op1),
5657 thirtytwo));
5658 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5659 gen_lowpart (V1TImode, op2),
5660 thirtytwo));
5661 /* Multiply elements 3 and 1. */
5662 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5663 t2, t3));
5664
5665 /* Move the results in element 2 down to element 1; we don't care
5666 what goes in elements 2 and 3. */
5667 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5668 const0_rtx, const0_rtx));
5669 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5670 const0_rtx, const0_rtx));
5671
5672 /* Merge the parts back together. */
5673 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5674 DONE;
5675 })
5676
5677 (define_insn_and_split "mulv2di3"
5678 [(set (match_operand:V2DI 0 "register_operand" "")
5679 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5680 (match_operand:V2DI 2 "register_operand" "")))]
5681 "TARGET_SSE2
5682 && can_create_pseudo_p ()"
5683 "#"
5684 "&& 1"
5685 [(const_int 0)]
5686 {
5687 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5688 rtx op0, op1, op2;
5689
5690 op0 = operands[0];
5691 op1 = operands[1];
5692 op2 = operands[2];
5693
5694 if (TARGET_XOP)
5695 {
5696 /* op1: A,B,C,D, op2: E,F,G,H */
5697 op1 = gen_lowpart (V4SImode, op1);
5698 op2 = gen_lowpart (V4SImode, op2);
5699
5700 t1 = gen_reg_rtx (V4SImode);
5701 t2 = gen_reg_rtx (V4SImode);
5702 t3 = gen_reg_rtx (V2DImode);
5703 t4 = gen_reg_rtx (V2DImode);
5704
5705 /* t1: B,A,D,C */
5706 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5707 GEN_INT (1),
5708 GEN_INT (0),
5709 GEN_INT (3),
5710 GEN_INT (2)));
5711
5712 /* t2: (B*E),(A*F),(D*G),(C*H) */
5713 emit_insn (gen_mulv4si3 (t2, t1, op2));
5714
5715 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5716 emit_insn (gen_xop_phadddq (t3, t2));
5717
5718 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5719 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5720
5721 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5722 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5723 }
5724 else
5725 {
5726 t1 = gen_reg_rtx (V2DImode);
5727 t2 = gen_reg_rtx (V2DImode);
5728 t3 = gen_reg_rtx (V2DImode);
5729 t4 = gen_reg_rtx (V2DImode);
5730 t5 = gen_reg_rtx (V2DImode);
5731 t6 = gen_reg_rtx (V2DImode);
5732 thirtytwo = GEN_INT (32);
5733
5734 /* Multiply low parts. */
5735 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5736 gen_lowpart (V4SImode, op2)));
5737
5738 /* Shift input vectors left 32 bits so we can multiply high parts. */
5739 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5740 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5741
5742 /* Multiply high parts by low parts. */
5743 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5744 gen_lowpart (V4SImode, t3)));
5745 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5746 gen_lowpart (V4SImode, t2)));
5747
5748 /* Shift them back. */
5749 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5750 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5751
5752 /* Add the three parts together. */
5753 emit_insn (gen_addv2di3 (t6, t1, t4));
5754 emit_insn (gen_addv2di3 (op0, t6, t5));
5755 }
5756 DONE;
5757 })
5758
5759 (define_expand "vec_widen_smult_hi_v8hi"
5760 [(match_operand:V4SI 0 "register_operand" "")
5761 (match_operand:V8HI 1 "register_operand" "")
5762 (match_operand:V8HI 2 "register_operand" "")]
5763 "TARGET_SSE2"
5764 {
5765 rtx op1, op2, t1, t2, dest;
5766
5767 op1 = operands[1];
5768 op2 = operands[2];
5769 t1 = gen_reg_rtx (V8HImode);
5770 t2 = gen_reg_rtx (V8HImode);
5771 dest = gen_lowpart (V8HImode, operands[0]);
5772
5773 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5774 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5775 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5776 DONE;
5777 })
5778
5779 (define_expand "vec_widen_smult_lo_v8hi"
5780 [(match_operand:V4SI 0 "register_operand" "")
5781 (match_operand:V8HI 1 "register_operand" "")
5782 (match_operand:V8HI 2 "register_operand" "")]
5783 "TARGET_SSE2"
5784 {
5785 rtx op1, op2, t1, t2, dest;
5786
5787 op1 = operands[1];
5788 op2 = operands[2];
5789 t1 = gen_reg_rtx (V8HImode);
5790 t2 = gen_reg_rtx (V8HImode);
5791 dest = gen_lowpart (V8HImode, operands[0]);
5792
5793 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5794 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5795 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5796 DONE;
5797 })
5798
5799 (define_expand "vec_widen_umult_hi_v8hi"
5800 [(match_operand:V4SI 0 "register_operand" "")
5801 (match_operand:V8HI 1 "register_operand" "")
5802 (match_operand:V8HI 2 "register_operand" "")]
5803 "TARGET_SSE2"
5804 {
5805 rtx op1, op2, t1, t2, dest;
5806
5807 op1 = operands[1];
5808 op2 = operands[2];
5809 t1 = gen_reg_rtx (V8HImode);
5810 t2 = gen_reg_rtx (V8HImode);
5811 dest = gen_lowpart (V8HImode, operands[0]);
5812
5813 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5814 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5815 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5816 DONE;
5817 })
5818
5819 (define_expand "vec_widen_umult_lo_v8hi"
5820 [(match_operand:V4SI 0 "register_operand" "")
5821 (match_operand:V8HI 1 "register_operand" "")
5822 (match_operand:V8HI 2 "register_operand" "")]
5823 "TARGET_SSE2"
5824 {
5825 rtx op1, op2, t1, t2, dest;
5826
5827 op1 = operands[1];
5828 op2 = operands[2];
5829 t1 = gen_reg_rtx (V8HImode);
5830 t2 = gen_reg_rtx (V8HImode);
5831 dest = gen_lowpart (V8HImode, operands[0]);
5832
5833 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5834 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5835 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5836 DONE;
5837 })
5838
5839 (define_expand "vec_widen_smult_hi_v4si"
5840 [(match_operand:V2DI 0 "register_operand" "")
5841 (match_operand:V4SI 1 "register_operand" "")
5842 (match_operand:V4SI 2 "register_operand" "")]
5843 "TARGET_XOP"
5844 {
5845 rtx t1, t2;
5846
5847 t1 = gen_reg_rtx (V4SImode);
5848 t2 = gen_reg_rtx (V4SImode);
5849
5850 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5851 GEN_INT (0),
5852 GEN_INT (2),
5853 GEN_INT (1),
5854 GEN_INT (3)));
5855 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5856 GEN_INT (0),
5857 GEN_INT (2),
5858 GEN_INT (1),
5859 GEN_INT (3)));
5860 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5861 DONE;
5862 })
5863
5864 (define_expand "vec_widen_smult_lo_v4si"
5865 [(match_operand:V2DI 0 "register_operand" "")
5866 (match_operand:V4SI 1 "register_operand" "")
5867 (match_operand:V4SI 2 "register_operand" "")]
5868 "TARGET_XOP"
5869 {
5870 rtx t1, t2;
5871
5872 t1 = gen_reg_rtx (V4SImode);
5873 t2 = gen_reg_rtx (V4SImode);
5874
5875 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5876 GEN_INT (0),
5877 GEN_INT (2),
5878 GEN_INT (1),
5879 GEN_INT (3)));
5880 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5881 GEN_INT (0),
5882 GEN_INT (2),
5883 GEN_INT (1),
5884 GEN_INT (3)));
5885 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5886 DONE;
5887 })
5888
5889 (define_expand "vec_widen_umult_hi_v4si"
5890 [(match_operand:V2DI 0 "register_operand" "")
5891 (match_operand:V4SI 1 "register_operand" "")
5892 (match_operand:V4SI 2 "register_operand" "")]
5893 "TARGET_SSE2"
5894 {
5895 rtx op1, op2, t1, t2;
5896
5897 op1 = operands[1];
5898 op2 = operands[2];
5899 t1 = gen_reg_rtx (V4SImode);
5900 t2 = gen_reg_rtx (V4SImode);
5901
5902 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5903 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5904 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5905 DONE;
5906 })
5907
5908 (define_expand "vec_widen_umult_lo_v4si"
5909 [(match_operand:V2DI 0 "register_operand" "")
5910 (match_operand:V4SI 1 "register_operand" "")
5911 (match_operand:V4SI 2 "register_operand" "")]
5912 "TARGET_SSE2"
5913 {
5914 rtx op1, op2, t1, t2;
5915
5916 op1 = operands[1];
5917 op2 = operands[2];
5918 t1 = gen_reg_rtx (V4SImode);
5919 t2 = gen_reg_rtx (V4SImode);
5920
5921 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5922 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5923 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5924 DONE;
5925 })
5926
5927 (define_expand "sdot_prodv8hi"
5928 [(match_operand:V4SI 0 "register_operand" "")
5929 (match_operand:V8HI 1 "register_operand" "")
5930 (match_operand:V8HI 2 "register_operand" "")
5931 (match_operand:V4SI 3 "register_operand" "")]
5932 "TARGET_SSE2"
5933 {
5934 rtx t = gen_reg_rtx (V4SImode);
5935 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5936 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5937 DONE;
5938 })
5939
5940 (define_expand "udot_prodv4si"
5941 [(match_operand:V2DI 0 "register_operand" "")
5942 (match_operand:V4SI 1 "register_operand" "")
5943 (match_operand:V4SI 2 "register_operand" "")
5944 (match_operand:V2DI 3 "register_operand" "")]
5945 "TARGET_SSE2"
5946 {
5947 rtx t1, t2, t3, t4;
5948
5949 t1 = gen_reg_rtx (V2DImode);
5950 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5951 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5952
5953 t2 = gen_reg_rtx (V4SImode);
5954 t3 = gen_reg_rtx (V4SImode);
5955 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5956 gen_lowpart (V1TImode, operands[1]),
5957 GEN_INT (32)));
5958 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5959 gen_lowpart (V1TImode, operands[2]),
5960 GEN_INT (32)));
5961
5962 t4 = gen_reg_rtx (V2DImode);
5963 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5964
5965 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5966 DONE;
5967 })
5968
5969 (define_insn "*avx_ashr<mode>3"
5970 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5971 (ashiftrt:SSEMODE24
5972 (match_operand:SSEMODE24 1 "register_operand" "x")
5973 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5974 "TARGET_AVX"
5975 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5976 [(set_attr "type" "sseishft")
5977 (set_attr "prefix" "vex")
5978 (set (attr "length_immediate")
5979 (if_then_else (match_operand 2 "const_int_operand" "")
5980 (const_string "1")
5981 (const_string "0")))
5982 (set_attr "mode" "TI")])
5983
5984 (define_insn "ashr<mode>3"
5985 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5986 (ashiftrt:SSEMODE24
5987 (match_operand:SSEMODE24 1 "register_operand" "0")
5988 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5989 "TARGET_SSE2"
5990 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5991 [(set_attr "type" "sseishft")
5992 (set_attr "prefix_data16" "1")
5993 (set (attr "length_immediate")
5994 (if_then_else (match_operand 2 "const_int_operand" "")
5995 (const_string "1")
5996 (const_string "0")))
5997 (set_attr "mode" "TI")])
5998
5999 (define_insn "*avx_lshrv1ti3"
6000 [(set (match_operand:V1TI 0 "register_operand" "=x")
6001 (lshiftrt:V1TI
6002 (match_operand:V1TI 1 "register_operand" "x")
6003 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6004 "TARGET_AVX"
6005 {
6006 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6007 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
6008 }
6009 [(set_attr "type" "sseishft")
6010 (set_attr "prefix" "vex")
6011 (set_attr "length_immediate" "1")
6012 (set_attr "mode" "TI")])
6013
6014 (define_insn "*avx_lshr<mode>3"
6015 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6016 (lshiftrt:SSEMODE248
6017 (match_operand:SSEMODE248 1 "register_operand" "x")
6018 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6019 "TARGET_AVX"
6020 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6021 [(set_attr "type" "sseishft")
6022 (set_attr "prefix" "vex")
6023 (set (attr "length_immediate")
6024 (if_then_else (match_operand 2 "const_int_operand" "")
6025 (const_string "1")
6026 (const_string "0")))
6027 (set_attr "mode" "TI")])
6028
6029 (define_insn "sse2_lshrv1ti3"
6030 [(set (match_operand:V1TI 0 "register_operand" "=x")
6031 (lshiftrt:V1TI
6032 (match_operand:V1TI 1 "register_operand" "0")
6033 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6034 "TARGET_SSE2"
6035 {
6036 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6037 return "psrldq\t{%2, %0|%0, %2}";
6038 }
6039 [(set_attr "type" "sseishft")
6040 (set_attr "prefix_data16" "1")
6041 (set_attr "length_immediate" "1")
6042 (set_attr "atom_unit" "sishuf")
6043 (set_attr "mode" "TI")])
6044
6045 (define_insn "lshr<mode>3"
6046 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6047 (lshiftrt:SSEMODE248
6048 (match_operand:SSEMODE248 1 "register_operand" "0")
6049 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6050 "TARGET_SSE2"
6051 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
6052 [(set_attr "type" "sseishft")
6053 (set_attr "prefix_data16" "1")
6054 (set (attr "length_immediate")
6055 (if_then_else (match_operand 2 "const_int_operand" "")
6056 (const_string "1")
6057 (const_string "0")))
6058 (set_attr "mode" "TI")])
6059
6060 (define_insn "*avx_ashlv1ti3"
6061 [(set (match_operand:V1TI 0 "register_operand" "=x")
6062 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
6063 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6064 "TARGET_AVX"
6065 {
6066 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6067 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
6068 }
6069 [(set_attr "type" "sseishft")
6070 (set_attr "prefix" "vex")
6071 (set_attr "length_immediate" "1")
6072 (set_attr "mode" "TI")])
6073
6074 (define_insn "*avx_ashl<mode>3"
6075 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6076 (ashift:SSEMODE248
6077 (match_operand:SSEMODE248 1 "register_operand" "x")
6078 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6079 "TARGET_AVX"
6080 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6081 [(set_attr "type" "sseishft")
6082 (set_attr "prefix" "vex")
6083 (set (attr "length_immediate")
6084 (if_then_else (match_operand 2 "const_int_operand" "")
6085 (const_string "1")
6086 (const_string "0")))
6087 (set_attr "mode" "TI")])
6088
6089 (define_insn "sse2_ashlv1ti3"
6090 [(set (match_operand:V1TI 0 "register_operand" "=x")
6091 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
6092 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
6093 "TARGET_SSE2"
6094 {
6095 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
6096 return "pslldq\t{%2, %0|%0, %2}";
6097 }
6098 [(set_attr "type" "sseishft")
6099 (set_attr "prefix_data16" "1")
6100 (set_attr "length_immediate" "1")
6101 (set_attr "mode" "TI")])
6102
6103 (define_insn "ashl<mode>3"
6104 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6105 (ashift:SSEMODE248
6106 (match_operand:SSEMODE248 1 "register_operand" "0")
6107 (match_operand:SI 2 "nonmemory_operand" "xN")))]
6108 "TARGET_SSE2"
6109 "psll<ssevecsize>\t{%2, %0|%0, %2}"
6110 [(set_attr "type" "sseishft")
6111 (set_attr "prefix_data16" "1")
6112 (set (attr "length_immediate")
6113 (if_then_else (match_operand 2 "const_int_operand" "")
6114 (const_string "1")
6115 (const_string "0")))
6116 (set_attr "mode" "TI")])
6117
6118 (define_expand "vec_shl_<mode>"
6119 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6120 (ashift:V1TI
6121 (match_operand:SSEMODEI 1 "register_operand" "")
6122 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6123 "TARGET_SSE2"
6124 {
6125 operands[0] = gen_lowpart (V1TImode, operands[0]);
6126 operands[1] = gen_lowpart (V1TImode, operands[1]);
6127 })
6128
6129 (define_expand "vec_shr_<mode>"
6130 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6131 (lshiftrt:V1TI
6132 (match_operand:SSEMODEI 1 "register_operand" "")
6133 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6134 "TARGET_SSE2"
6135 {
6136 operands[0] = gen_lowpart (V1TImode, operands[0]);
6137 operands[1] = gen_lowpart (V1TImode, operands[1]);
6138 })
6139
6140 (define_insn "*avx_<code><mode>3"
6141 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6142 (umaxmin:SSEMODE124
6143 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6144 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6145 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6146 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6147 [(set_attr "type" "sseiadd")
6148 (set (attr "prefix_extra")
6149 (if_then_else (match_operand:V16QI 0 "" "")
6150 (const_string "0")
6151 (const_string "1")))
6152 (set_attr "prefix" "vex")
6153 (set_attr "mode" "TI")])
6154
6155 (define_expand "<code>v16qi3"
6156 [(set (match_operand:V16QI 0 "register_operand" "")
6157 (umaxmin:V16QI
6158 (match_operand:V16QI 1 "nonimmediate_operand" "")
6159 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6160 "TARGET_SSE2"
6161 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
6162
6163 (define_insn "*<code>v16qi3"
6164 [(set (match_operand:V16QI 0 "register_operand" "=x")
6165 (umaxmin:V16QI
6166 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6167 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6168 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6169 "p<maxmin_int>b\t{%2, %0|%0, %2}"
6170 [(set_attr "type" "sseiadd")
6171 (set_attr "prefix_data16" "1")
6172 (set_attr "mode" "TI")])
6173
6174 (define_insn "*avx_<code><mode>3"
6175 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6176 (smaxmin:SSEMODE124
6177 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6178 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6179 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6180 "vp<maxmin_int><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6181 [(set_attr "type" "sseiadd")
6182 (set (attr "prefix_extra")
6183 (if_then_else (match_operand:V8HI 0 "" "")
6184 (const_string "0")
6185 (const_string "1")))
6186 (set_attr "prefix" "vex")
6187 (set_attr "mode" "TI")])
6188
6189 (define_expand "<code>v8hi3"
6190 [(set (match_operand:V8HI 0 "register_operand" "")
6191 (smaxmin:V8HI
6192 (match_operand:V8HI 1 "nonimmediate_operand" "")
6193 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6194 "TARGET_SSE2"
6195 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
6196
6197 (define_insn "*<code>v8hi3"
6198 [(set (match_operand:V8HI 0 "register_operand" "=x")
6199 (smaxmin:V8HI
6200 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6201 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6202 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6203 "p<maxmin_int>w\t{%2, %0|%0, %2}"
6204 [(set_attr "type" "sseiadd")
6205 (set_attr "prefix_data16" "1")
6206 (set_attr "mode" "TI")])
6207
6208 (define_expand "umaxv8hi3"
6209 [(set (match_operand:V8HI 0 "register_operand" "")
6210 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6211 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6212 "TARGET_SSE2"
6213 {
6214 if (TARGET_SSE4_1)
6215 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6216 else
6217 {
6218 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6219 if (rtx_equal_p (op3, op2))
6220 op3 = gen_reg_rtx (V8HImode);
6221 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6222 emit_insn (gen_addv8hi3 (op0, op3, op2));
6223 DONE;
6224 }
6225 })
6226
6227 (define_expand "smax<mode>3"
6228 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6229 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6230 (match_operand:SSEMODE14 2 "register_operand" "")))]
6231 "TARGET_SSE2"
6232 {
6233 if (TARGET_SSE4_1)
6234 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
6235 else
6236 {
6237 rtx xops[6];
6238 bool ok;
6239
6240 xops[0] = operands[0];
6241 xops[1] = operands[1];
6242 xops[2] = operands[2];
6243 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6244 xops[4] = operands[1];
6245 xops[5] = operands[2];
6246 ok = ix86_expand_int_vcond (xops);
6247 gcc_assert (ok);
6248 DONE;
6249 }
6250 })
6251
6252 (define_insn "*sse4_1_<code><mode>3"
6253 [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6254 (smaxmin:SSEMODE14
6255 (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6256 (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6257 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6258 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6259 [(set_attr "type" "sseiadd")
6260 (set_attr "prefix_extra" "1")
6261 (set_attr "mode" "TI")])
6262
6263 (define_expand "smaxv2di3"
6264 [(set (match_operand:V2DI 0 "register_operand" "")
6265 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6266 (match_operand:V2DI 2 "register_operand" "")))]
6267 "TARGET_SSE4_2"
6268 {
6269 rtx xops[6];
6270 bool ok;
6271
6272 xops[0] = operands[0];
6273 xops[1] = operands[1];
6274 xops[2] = operands[2];
6275 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6276 xops[4] = operands[1];
6277 xops[5] = operands[2];
6278 ok = ix86_expand_int_vcond (xops);
6279 gcc_assert (ok);
6280 DONE;
6281 })
6282
6283 (define_expand "umaxv4si3"
6284 [(set (match_operand:V4SI 0 "register_operand" "")
6285 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6286 (match_operand:V4SI 2 "register_operand" "")))]
6287 "TARGET_SSE2"
6288 {
6289 if (TARGET_SSE4_1)
6290 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6291 else
6292 {
6293 rtx xops[6];
6294 bool ok;
6295
6296 xops[0] = operands[0];
6297 xops[1] = operands[1];
6298 xops[2] = operands[2];
6299 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6300 xops[4] = operands[1];
6301 xops[5] = operands[2];
6302 ok = ix86_expand_int_vcond (xops);
6303 gcc_assert (ok);
6304 DONE;
6305 }
6306 })
6307
6308 (define_insn "*sse4_1_<code><mode>3"
6309 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6310 (umaxmin:SSEMODE24
6311 (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6312 (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6313 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6314 "p<maxmin_int><ssevecsize>\t{%2, %0|%0, %2}"
6315 [(set_attr "type" "sseiadd")
6316 (set_attr "prefix_extra" "1")
6317 (set_attr "mode" "TI")])
6318
6319 (define_expand "umaxv2di3"
6320 [(set (match_operand:V2DI 0 "register_operand" "")
6321 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6322 (match_operand:V2DI 2 "register_operand" "")))]
6323 "TARGET_SSE4_2"
6324 {
6325 rtx xops[6];
6326 bool ok;
6327
6328 xops[0] = operands[0];
6329 xops[1] = operands[1];
6330 xops[2] = operands[2];
6331 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6332 xops[4] = operands[1];
6333 xops[5] = operands[2];
6334 ok = ix86_expand_int_vcond (xops);
6335 gcc_assert (ok);
6336 DONE;
6337 })
6338
6339 (define_expand "smin<mode>3"
6340 [(set (match_operand:SSEMODE14 0 "register_operand" "")
6341 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6342 (match_operand:SSEMODE14 2 "register_operand" "")))]
6343 "TARGET_SSE2"
6344 {
6345 if (TARGET_SSE4_1)
6346 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
6347 else
6348 {
6349 rtx xops[6];
6350 bool ok;
6351
6352 xops[0] = operands[0];
6353 xops[1] = operands[2];
6354 xops[2] = operands[1];
6355 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6356 xops[4] = operands[1];
6357 xops[5] = operands[2];
6358 ok = ix86_expand_int_vcond (xops);
6359 gcc_assert (ok);
6360 DONE;
6361 }
6362 })
6363
6364 (define_expand "sminv2di3"
6365 [(set (match_operand:V2DI 0 "register_operand" "")
6366 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6367 (match_operand:V2DI 2 "register_operand" "")))]
6368 "TARGET_SSE4_2"
6369 {
6370 rtx xops[6];
6371 bool ok;
6372
6373 xops[0] = operands[0];
6374 xops[1] = operands[2];
6375 xops[2] = operands[1];
6376 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6377 xops[4] = operands[1];
6378 xops[5] = operands[2];
6379 ok = ix86_expand_int_vcond (xops);
6380 gcc_assert (ok);
6381 DONE;
6382 })
6383
6384 (define_expand "umin<mode>3"
6385 [(set (match_operand:SSEMODE24 0 "register_operand" "")
6386 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6387 (match_operand:SSEMODE24 2 "register_operand" "")))]
6388 "TARGET_SSE2"
6389 {
6390 if (TARGET_SSE4_1)
6391 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
6392 else
6393 {
6394 rtx xops[6];
6395 bool ok;
6396
6397 xops[0] = operands[0];
6398 xops[1] = operands[2];
6399 xops[2] = operands[1];
6400 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6401 xops[4] = operands[1];
6402 xops[5] = operands[2];
6403 ok = ix86_expand_int_vcond (xops);
6404 gcc_assert (ok);
6405 DONE;
6406 }
6407 })
6408
6409 (define_expand "uminv2di3"
6410 [(set (match_operand:V2DI 0 "register_operand" "")
6411 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6412 (match_operand:V2DI 2 "register_operand" "")))]
6413 "TARGET_SSE4_2"
6414 {
6415 rtx xops[6];
6416 bool ok;
6417
6418 xops[0] = operands[0];
6419 xops[1] = operands[2];
6420 xops[2] = operands[1];
6421 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6422 xops[4] = operands[1];
6423 xops[5] = operands[2];
6424 ok = ix86_expand_int_vcond (xops);
6425 gcc_assert (ok);
6426 DONE;
6427 })
6428
6429 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6430 ;;
6431 ;; Parallel integral comparisons
6432 ;;
6433 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6434
6435 (define_expand "sse2_eq<mode>3"
6436 [(set (match_operand:SSEMODE124 0 "register_operand" "")
6437 (eq:SSEMODE124
6438 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6439 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6440 "TARGET_SSE2 && !TARGET_XOP "
6441 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6442
6443 (define_insn "*avx_eq<mode>3"
6444 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6445 (eq:SSEMODE1248
6446 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6447 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6448 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6449 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6450 [(set_attr "type" "ssecmp")
6451 (set (attr "prefix_extra")
6452 (if_then_else (match_operand:V2DI 0 "" "")
6453 (const_string "1")
6454 (const_string "*")))
6455 (set_attr "prefix" "vex")
6456 (set_attr "mode" "TI")])
6457
6458 (define_insn "*sse2_eq<mode>3"
6459 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6460 (eq:SSEMODE124
6461 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6462 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6463 "TARGET_SSE2 && !TARGET_XOP
6464 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6465 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
6466 [(set_attr "type" "ssecmp")
6467 (set_attr "prefix_data16" "1")
6468 (set_attr "mode" "TI")])
6469
6470 (define_expand "sse4_1_eqv2di3"
6471 [(set (match_operand:V2DI 0 "register_operand" "")
6472 (eq:V2DI
6473 (match_operand:V2DI 1 "nonimmediate_operand" "")
6474 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6475 "TARGET_SSE4_1"
6476 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6477
6478 (define_insn "*sse4_1_eqv2di3"
6479 [(set (match_operand:V2DI 0 "register_operand" "=x")
6480 (eq:V2DI
6481 (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6482 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6483 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6484 "pcmpeqq\t{%2, %0|%0, %2}"
6485 [(set_attr "type" "ssecmp")
6486 (set_attr "prefix_extra" "1")
6487 (set_attr "mode" "TI")])
6488
6489 (define_insn "*avx_gt<mode>3"
6490 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6491 (gt:SSEMODE1248
6492 (match_operand:SSEMODE1248 1 "register_operand" "x")
6493 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6494 "TARGET_AVX"
6495 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
6496 [(set_attr "type" "ssecmp")
6497 (set (attr "prefix_extra")
6498 (if_then_else (match_operand:V2DI 0 "" "")
6499 (const_string "1")
6500 (const_string "*")))
6501 (set_attr "prefix" "vex")
6502 (set_attr "mode" "TI")])
6503
6504 (define_insn "sse2_gt<mode>3"
6505 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6506 (gt:SSEMODE124
6507 (match_operand:SSEMODE124 1 "register_operand" "0")
6508 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6509 "TARGET_SSE2 && !TARGET_XOP"
6510 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
6511 [(set_attr "type" "ssecmp")
6512 (set_attr "prefix_data16" "1")
6513 (set_attr "mode" "TI")])
6514
6515 (define_insn "sse4_2_gtv2di3"
6516 [(set (match_operand:V2DI 0 "register_operand" "=x")
6517 (gt:V2DI
6518 (match_operand:V2DI 1 "register_operand" "0")
6519 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6520 "TARGET_SSE4_2"
6521 "pcmpgtq\t{%2, %0|%0, %2}"
6522 [(set_attr "type" "ssecmp")
6523 (set_attr "prefix_extra" "1")
6524 (set_attr "mode" "TI")])
6525
6526 (define_expand "vcond<mode>"
6527 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6528 (if_then_else:SSEMODE124C8
6529 (match_operator 3 ""
6530 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6531 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6532 (match_operand:SSEMODE124C8 1 "general_operand" "")
6533 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6534 "TARGET_SSE2"
6535 {
6536 bool ok = ix86_expand_int_vcond (operands);
6537 gcc_assert (ok);
6538 DONE;
6539 })
6540
6541 (define_expand "vcondu<mode>"
6542 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6543 (if_then_else:SSEMODE124C8
6544 (match_operator 3 ""
6545 [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6546 (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6547 (match_operand:SSEMODE124C8 1 "general_operand" "")
6548 (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6549 "TARGET_SSE2"
6550 {
6551 bool ok = ix86_expand_int_vcond (operands);
6552 gcc_assert (ok);
6553 DONE;
6554 })
6555
6556 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6557 ;;
6558 ;; Parallel bitwise logical operations
6559 ;;
6560 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6561
6562 (define_expand "one_cmpl<mode>2"
6563 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6564 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6565 (match_dup 2)))]
6566 "TARGET_SSE2"
6567 {
6568 int i, n = GET_MODE_NUNITS (<MODE>mode);
6569 rtvec v = rtvec_alloc (n);
6570
6571 for (i = 0; i < n; ++i)
6572 RTVEC_ELT (v, i) = constm1_rtx;
6573
6574 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6575 })
6576
6577 (define_insn "*avx_andnot<mode>3"
6578 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6579 (and:AVX256MODEI
6580 (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6581 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6582 "TARGET_AVX"
6583 "vandnps\t{%2, %1, %0|%0, %1, %2}"
6584 [(set_attr "type" "sselog")
6585 (set_attr "prefix" "vex")
6586 (set_attr "mode" "<avxvecpsmode>")])
6587
6588 (define_insn "*sse_andnot<mode>3"
6589 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6590 (and:SSEMODEI
6591 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6592 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6593 "(TARGET_SSE && !TARGET_SSE2)"
6594 "andnps\t{%2, %0|%0, %2}"
6595 [(set_attr "type" "sselog")
6596 (set_attr "mode" "V4SF")])
6597
6598 (define_insn "*avx_andnot<mode>3"
6599 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6600 (and:SSEMODEI
6601 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6602 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6603 "TARGET_AVX"
6604 "vpandn\t{%2, %1, %0|%0, %1, %2}"
6605 [(set_attr "type" "sselog")
6606 (set_attr "prefix" "vex")
6607 (set_attr "mode" "TI")])
6608
6609 (define_insn "sse2_andnot<mode>3"
6610 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6611 (and:SSEMODEI
6612 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6613 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6614 "TARGET_SSE2"
6615 "pandn\t{%2, %0|%0, %2}"
6616 [(set_attr "type" "sselog")
6617 (set_attr "prefix_data16" "1")
6618 (set_attr "mode" "TI")])
6619
6620 (define_insn "*andnottf3"
6621 [(set (match_operand:TF 0 "register_operand" "=x")
6622 (and:TF
6623 (not:TF (match_operand:TF 1 "register_operand" "0"))
6624 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6625 "TARGET_SSE2"
6626 "pandn\t{%2, %0|%0, %2}"
6627 [(set_attr "type" "sselog")
6628 (set_attr "prefix_data16" "1")
6629 (set_attr "mode" "TI")])
6630
6631 (define_expand "<code><mode>3"
6632 [(set (match_operand:SSEMODEI 0 "register_operand" "")
6633 (any_logic:SSEMODEI
6634 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6635 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6636 "TARGET_SSE"
6637 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6638
6639 (define_insn "*avx_<code><mode>3"
6640 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6641 (any_logic:AVX256MODEI
6642 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6643 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6644 "TARGET_AVX
6645 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6646 "v<logic>ps\t{%2, %1, %0|%0, %1, %2}"
6647 [(set_attr "type" "sselog")
6648 (set_attr "prefix" "vex")
6649 (set_attr "mode" "<avxvecpsmode>")])
6650
6651 (define_insn "*sse_<code><mode>3"
6652 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6653 (any_logic:SSEMODEI
6654 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6655 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6656 "(TARGET_SSE && !TARGET_SSE2)
6657 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6658 "<logic>ps\t{%2, %0|%0, %2}"
6659 [(set_attr "type" "sselog")
6660 (set_attr "mode" "V4SF")])
6661
6662 (define_insn "*avx_<code><mode>3"
6663 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6664 (any_logic:SSEMODEI
6665 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6666 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6667 "TARGET_AVX
6668 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6669 "vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6670 [(set_attr "type" "sselog")
6671 (set_attr "prefix" "vex")
6672 (set_attr "mode" "TI")])
6673
6674 (define_insn "*sse2_<code><mode>3"
6675 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6676 (any_logic:SSEMODEI
6677 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6678 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6679 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6680 "p<logic>\t{%2, %0|%0, %2}"
6681 [(set_attr "type" "sselog")
6682 (set_attr "prefix_data16" "1")
6683 (set_attr "mode" "TI")])
6684
6685 (define_expand "<code>tf3"
6686 [(set (match_operand:TF 0 "register_operand" "")
6687 (any_logic:TF
6688 (match_operand:TF 1 "nonimmediate_operand" "")
6689 (match_operand:TF 2 "nonimmediate_operand" "")))]
6690 "TARGET_SSE2"
6691 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6692
6693 (define_insn "*<code>tf3"
6694 [(set (match_operand:TF 0 "register_operand" "=x")
6695 (any_logic:TF
6696 (match_operand:TF 1 "nonimmediate_operand" "%0")
6697 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6698 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6699 "p<logic>\t{%2, %0|%0, %2}"
6700 [(set_attr "type" "sselog")
6701 (set_attr "prefix_data16" "1")
6702 (set_attr "mode" "TI")])
6703
6704 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6705 ;;
6706 ;; Parallel integral element swizzling
6707 ;;
6708 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6709
6710 (define_expand "vec_pack_trunc_v8hi"
6711 [(match_operand:V16QI 0 "register_operand" "")
6712 (match_operand:V8HI 1 "register_operand" "")
6713 (match_operand:V8HI 2 "register_operand" "")]
6714 "TARGET_SSE2"
6715 {
6716 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6717 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6718 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6719 DONE;
6720 })
6721
6722 (define_expand "vec_pack_trunc_v4si"
6723 [(match_operand:V8HI 0 "register_operand" "")
6724 (match_operand:V4SI 1 "register_operand" "")
6725 (match_operand:V4SI 2 "register_operand" "")]
6726 "TARGET_SSE2"
6727 {
6728 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6729 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6730 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6731 DONE;
6732 })
6733
6734 (define_expand "vec_pack_trunc_v2di"
6735 [(match_operand:V4SI 0 "register_operand" "")
6736 (match_operand:V2DI 1 "register_operand" "")
6737 (match_operand:V2DI 2 "register_operand" "")]
6738 "TARGET_SSE2"
6739 {
6740 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6741 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6742 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6743 DONE;
6744 })
6745
6746 (define_insn "*avx_packsswb"
6747 [(set (match_operand:V16QI 0 "register_operand" "=x")
6748 (vec_concat:V16QI
6749 (ss_truncate:V8QI
6750 (match_operand:V8HI 1 "register_operand" "x"))
6751 (ss_truncate:V8QI
6752 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6753 "TARGET_AVX"
6754 "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6755 [(set_attr "type" "sselog")
6756 (set_attr "prefix" "vex")
6757 (set_attr "mode" "TI")])
6758
6759 (define_insn "sse2_packsswb"
6760 [(set (match_operand:V16QI 0 "register_operand" "=x")
6761 (vec_concat:V16QI
6762 (ss_truncate:V8QI
6763 (match_operand:V8HI 1 "register_operand" "0"))
6764 (ss_truncate:V8QI
6765 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6766 "TARGET_SSE2"
6767 "packsswb\t{%2, %0|%0, %2}"
6768 [(set_attr "type" "sselog")
6769 (set_attr "prefix_data16" "1")
6770 (set_attr "mode" "TI")])
6771
6772 (define_insn "*avx_packssdw"
6773 [(set (match_operand:V8HI 0 "register_operand" "=x")
6774 (vec_concat:V8HI
6775 (ss_truncate:V4HI
6776 (match_operand:V4SI 1 "register_operand" "x"))
6777 (ss_truncate:V4HI
6778 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6779 "TARGET_AVX"
6780 "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6781 [(set_attr "type" "sselog")
6782 (set_attr "prefix" "vex")
6783 (set_attr "mode" "TI")])
6784
6785 (define_insn "sse2_packssdw"
6786 [(set (match_operand:V8HI 0 "register_operand" "=x")
6787 (vec_concat:V8HI
6788 (ss_truncate:V4HI
6789 (match_operand:V4SI 1 "register_operand" "0"))
6790 (ss_truncate:V4HI
6791 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6792 "TARGET_SSE2"
6793 "packssdw\t{%2, %0|%0, %2}"
6794 [(set_attr "type" "sselog")
6795 (set_attr "prefix_data16" "1")
6796 (set_attr "mode" "TI")])
6797
6798 (define_insn "*avx_packuswb"
6799 [(set (match_operand:V16QI 0 "register_operand" "=x")
6800 (vec_concat:V16QI
6801 (us_truncate:V8QI
6802 (match_operand:V8HI 1 "register_operand" "x"))
6803 (us_truncate:V8QI
6804 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6805 "TARGET_AVX"
6806 "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6807 [(set_attr "type" "sselog")
6808 (set_attr "prefix" "vex")
6809 (set_attr "mode" "TI")])
6810
6811 (define_insn "sse2_packuswb"
6812 [(set (match_operand:V16QI 0 "register_operand" "=x")
6813 (vec_concat:V16QI
6814 (us_truncate:V8QI
6815 (match_operand:V8HI 1 "register_operand" "0"))
6816 (us_truncate:V8QI
6817 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6818 "TARGET_SSE2"
6819 "packuswb\t{%2, %0|%0, %2}"
6820 [(set_attr "type" "sselog")
6821 (set_attr "prefix_data16" "1")
6822 (set_attr "mode" "TI")])
6823
6824 (define_insn "*avx_interleave_highv16qi"
6825 [(set (match_operand:V16QI 0 "register_operand" "=x")
6826 (vec_select:V16QI
6827 (vec_concat:V32QI
6828 (match_operand:V16QI 1 "register_operand" "x")
6829 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6830 (parallel [(const_int 8) (const_int 24)
6831 (const_int 9) (const_int 25)
6832 (const_int 10) (const_int 26)
6833 (const_int 11) (const_int 27)
6834 (const_int 12) (const_int 28)
6835 (const_int 13) (const_int 29)
6836 (const_int 14) (const_int 30)
6837 (const_int 15) (const_int 31)])))]
6838 "TARGET_AVX"
6839 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6840 [(set_attr "type" "sselog")
6841 (set_attr "prefix" "vex")
6842 (set_attr "mode" "TI")])
6843
6844 (define_insn "vec_interleave_highv16qi"
6845 [(set (match_operand:V16QI 0 "register_operand" "=x")
6846 (vec_select:V16QI
6847 (vec_concat:V32QI
6848 (match_operand:V16QI 1 "register_operand" "0")
6849 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6850 (parallel [(const_int 8) (const_int 24)
6851 (const_int 9) (const_int 25)
6852 (const_int 10) (const_int 26)
6853 (const_int 11) (const_int 27)
6854 (const_int 12) (const_int 28)
6855 (const_int 13) (const_int 29)
6856 (const_int 14) (const_int 30)
6857 (const_int 15) (const_int 31)])))]
6858 "TARGET_SSE2"
6859 "punpckhbw\t{%2, %0|%0, %2}"
6860 [(set_attr "type" "sselog")
6861 (set_attr "prefix_data16" "1")
6862 (set_attr "mode" "TI")])
6863
6864 (define_insn "*avx_interleave_lowv16qi"
6865 [(set (match_operand:V16QI 0 "register_operand" "=x")
6866 (vec_select:V16QI
6867 (vec_concat:V32QI
6868 (match_operand:V16QI 1 "register_operand" "x")
6869 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6870 (parallel [(const_int 0) (const_int 16)
6871 (const_int 1) (const_int 17)
6872 (const_int 2) (const_int 18)
6873 (const_int 3) (const_int 19)
6874 (const_int 4) (const_int 20)
6875 (const_int 5) (const_int 21)
6876 (const_int 6) (const_int 22)
6877 (const_int 7) (const_int 23)])))]
6878 "TARGET_AVX"
6879 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6880 [(set_attr "type" "sselog")
6881 (set_attr "prefix" "vex")
6882 (set_attr "mode" "TI")])
6883
6884 (define_insn "vec_interleave_lowv16qi"
6885 [(set (match_operand:V16QI 0 "register_operand" "=x")
6886 (vec_select:V16QI
6887 (vec_concat:V32QI
6888 (match_operand:V16QI 1 "register_operand" "0")
6889 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6890 (parallel [(const_int 0) (const_int 16)
6891 (const_int 1) (const_int 17)
6892 (const_int 2) (const_int 18)
6893 (const_int 3) (const_int 19)
6894 (const_int 4) (const_int 20)
6895 (const_int 5) (const_int 21)
6896 (const_int 6) (const_int 22)
6897 (const_int 7) (const_int 23)])))]
6898 "TARGET_SSE2"
6899 "punpcklbw\t{%2, %0|%0, %2}"
6900 [(set_attr "type" "sselog")
6901 (set_attr "prefix_data16" "1")
6902 (set_attr "mode" "TI")])
6903
6904 (define_insn "*avx_interleave_highv8hi"
6905 [(set (match_operand:V8HI 0 "register_operand" "=x")
6906 (vec_select:V8HI
6907 (vec_concat:V16HI
6908 (match_operand:V8HI 1 "register_operand" "x")
6909 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6910 (parallel [(const_int 4) (const_int 12)
6911 (const_int 5) (const_int 13)
6912 (const_int 6) (const_int 14)
6913 (const_int 7) (const_int 15)])))]
6914 "TARGET_AVX"
6915 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6916 [(set_attr "type" "sselog")
6917 (set_attr "prefix" "vex")
6918 (set_attr "mode" "TI")])
6919
6920 (define_insn "vec_interleave_highv8hi"
6921 [(set (match_operand:V8HI 0 "register_operand" "=x")
6922 (vec_select:V8HI
6923 (vec_concat:V16HI
6924 (match_operand:V8HI 1 "register_operand" "0")
6925 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6926 (parallel [(const_int 4) (const_int 12)
6927 (const_int 5) (const_int 13)
6928 (const_int 6) (const_int 14)
6929 (const_int 7) (const_int 15)])))]
6930 "TARGET_SSE2"
6931 "punpckhwd\t{%2, %0|%0, %2}"
6932 [(set_attr "type" "sselog")
6933 (set_attr "prefix_data16" "1")
6934 (set_attr "mode" "TI")])
6935
6936 (define_insn "*avx_interleave_lowv8hi"
6937 [(set (match_operand:V8HI 0 "register_operand" "=x")
6938 (vec_select:V8HI
6939 (vec_concat:V16HI
6940 (match_operand:V8HI 1 "register_operand" "x")
6941 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6942 (parallel [(const_int 0) (const_int 8)
6943 (const_int 1) (const_int 9)
6944 (const_int 2) (const_int 10)
6945 (const_int 3) (const_int 11)])))]
6946 "TARGET_AVX"
6947 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6948 [(set_attr "type" "sselog")
6949 (set_attr "prefix" "vex")
6950 (set_attr "mode" "TI")])
6951
6952 (define_insn "vec_interleave_lowv8hi"
6953 [(set (match_operand:V8HI 0 "register_operand" "=x")
6954 (vec_select:V8HI
6955 (vec_concat:V16HI
6956 (match_operand:V8HI 1 "register_operand" "0")
6957 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6958 (parallel [(const_int 0) (const_int 8)
6959 (const_int 1) (const_int 9)
6960 (const_int 2) (const_int 10)
6961 (const_int 3) (const_int 11)])))]
6962 "TARGET_SSE2"
6963 "punpcklwd\t{%2, %0|%0, %2}"
6964 [(set_attr "type" "sselog")
6965 (set_attr "prefix_data16" "1")
6966 (set_attr "mode" "TI")])
6967
6968 (define_insn "*avx_interleave_highv4si"
6969 [(set (match_operand:V4SI 0 "register_operand" "=x")
6970 (vec_select:V4SI
6971 (vec_concat:V8SI
6972 (match_operand:V4SI 1 "register_operand" "x")
6973 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6974 (parallel [(const_int 2) (const_int 6)
6975 (const_int 3) (const_int 7)])))]
6976 "TARGET_AVX"
6977 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6978 [(set_attr "type" "sselog")
6979 (set_attr "prefix" "vex")
6980 (set_attr "mode" "TI")])
6981
6982 (define_insn "vec_interleave_highv4si"
6983 [(set (match_operand:V4SI 0 "register_operand" "=x")
6984 (vec_select:V4SI
6985 (vec_concat:V8SI
6986 (match_operand:V4SI 1 "register_operand" "0")
6987 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6988 (parallel [(const_int 2) (const_int 6)
6989 (const_int 3) (const_int 7)])))]
6990 "TARGET_SSE2"
6991 "punpckhdq\t{%2, %0|%0, %2}"
6992 [(set_attr "type" "sselog")
6993 (set_attr "prefix_data16" "1")
6994 (set_attr "mode" "TI")])
6995
6996 (define_insn "*avx_interleave_lowv4si"
6997 [(set (match_operand:V4SI 0 "register_operand" "=x")
6998 (vec_select:V4SI
6999 (vec_concat:V8SI
7000 (match_operand:V4SI 1 "register_operand" "x")
7001 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7002 (parallel [(const_int 0) (const_int 4)
7003 (const_int 1) (const_int 5)])))]
7004 "TARGET_AVX"
7005 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
7006 [(set_attr "type" "sselog")
7007 (set_attr "prefix" "vex")
7008 (set_attr "mode" "TI")])
7009
7010 (define_insn "vec_interleave_lowv4si"
7011 [(set (match_operand:V4SI 0 "register_operand" "=x")
7012 (vec_select:V4SI
7013 (vec_concat:V8SI
7014 (match_operand:V4SI 1 "register_operand" "0")
7015 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
7016 (parallel [(const_int 0) (const_int 4)
7017 (const_int 1) (const_int 5)])))]
7018 "TARGET_SSE2"
7019 "punpckldq\t{%2, %0|%0, %2}"
7020 [(set_attr "type" "sselog")
7021 (set_attr "prefix_data16" "1")
7022 (set_attr "mode" "TI")])
7023
7024 (define_insn "*avx_pinsr<ssevecsize>"
7025 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
7026 (vec_merge:SSEMODE124
7027 (vec_duplicate:SSEMODE124
7028 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
7029 (match_operand:SSEMODE124 1 "register_operand" "x")
7030 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
7031 "TARGET_AVX"
7032 {
7033 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7034 if (MEM_P (operands[2]))
7035 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7036 else
7037 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7038 }
7039 [(set_attr "type" "sselog")
7040 (set (attr "prefix_extra")
7041 (if_then_else (match_operand:V8HI 0 "" "")
7042 (const_string "0")
7043 (const_string "1")))
7044 (set_attr "length_immediate" "1")
7045 (set_attr "prefix" "vex")
7046 (set_attr "mode" "TI")])
7047
7048 (define_insn "*sse4_1_pinsrb"
7049 [(set (match_operand:V16QI 0 "register_operand" "=x")
7050 (vec_merge:V16QI
7051 (vec_duplicate:V16QI
7052 (match_operand:QI 2 "nonimmediate_operand" "rm"))
7053 (match_operand:V16QI 1 "register_operand" "0")
7054 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
7055 "TARGET_SSE4_1"
7056 {
7057 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7058 if (MEM_P (operands[2]))
7059 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
7060 else
7061 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
7062 }
7063 [(set_attr "type" "sselog")
7064 (set_attr "prefix_extra" "1")
7065 (set_attr "length_immediate" "1")
7066 (set_attr "mode" "TI")])
7067
7068 (define_insn "*sse2_pinsrw"
7069 [(set (match_operand:V8HI 0 "register_operand" "=x")
7070 (vec_merge:V8HI
7071 (vec_duplicate:V8HI
7072 (match_operand:HI 2 "nonimmediate_operand" "rm"))
7073 (match_operand:V8HI 1 "register_operand" "0")
7074 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
7075 "TARGET_SSE2"
7076 {
7077 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7078 if (MEM_P (operands[2]))
7079 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
7080 else
7081 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
7082 }
7083 [(set_attr "type" "sselog")
7084 (set_attr "prefix_data16" "1")
7085 (set_attr "length_immediate" "1")
7086 (set_attr "mode" "TI")])
7087
7088 ;; It must come before sse2_loadld since it is preferred.
7089 (define_insn "*sse4_1_pinsrd"
7090 [(set (match_operand:V4SI 0 "register_operand" "=x")
7091 (vec_merge:V4SI
7092 (vec_duplicate:V4SI
7093 (match_operand:SI 2 "nonimmediate_operand" "rm"))
7094 (match_operand:V4SI 1 "register_operand" "0")
7095 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
7096 "TARGET_SSE4_1"
7097 {
7098 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7099 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
7100 }
7101 [(set_attr "type" "sselog")
7102 (set_attr "prefix_extra" "1")
7103 (set_attr "length_immediate" "1")
7104 (set_attr "mode" "TI")])
7105
7106 (define_insn "*avx_pinsrq"
7107 [(set (match_operand:V2DI 0 "register_operand" "=x")
7108 (vec_merge:V2DI
7109 (vec_duplicate:V2DI
7110 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7111 (match_operand:V2DI 1 "register_operand" "x")
7112 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7113 "TARGET_AVX && TARGET_64BIT"
7114 {
7115 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7116 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7117 }
7118 [(set_attr "type" "sselog")
7119 (set_attr "prefix_extra" "1")
7120 (set_attr "length_immediate" "1")
7121 (set_attr "prefix" "vex")
7122 (set_attr "mode" "TI")])
7123
7124 (define_insn "*sse4_1_pinsrq"
7125 [(set (match_operand:V2DI 0 "register_operand" "=x")
7126 (vec_merge:V2DI
7127 (vec_duplicate:V2DI
7128 (match_operand:DI 2 "nonimmediate_operand" "rm"))
7129 (match_operand:V2DI 1 "register_operand" "0")
7130 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7131 "TARGET_SSE4_1 && TARGET_64BIT"
7132 {
7133 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7134 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7135 }
7136 [(set_attr "type" "sselog")
7137 (set_attr "prefix_rex" "1")
7138 (set_attr "prefix_extra" "1")
7139 (set_attr "length_immediate" "1")
7140 (set_attr "mode" "TI")])
7141
7142 (define_insn "*sse4_1_pextrb_<mode>"
7143 [(set (match_operand:SWI48 0 "register_operand" "=r")
7144 (zero_extend:SWI48
7145 (vec_select:QI
7146 (match_operand:V16QI 1 "register_operand" "x")
7147 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7148 "TARGET_SSE4_1"
7149 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7150 [(set_attr "type" "sselog")
7151 (set_attr "prefix_extra" "1")
7152 (set_attr "length_immediate" "1")
7153 (set_attr "prefix" "maybe_vex")
7154 (set_attr "mode" "TI")])
7155
7156 (define_insn "*sse4_1_pextrb_memory"
7157 [(set (match_operand:QI 0 "memory_operand" "=m")
7158 (vec_select:QI
7159 (match_operand:V16QI 1 "register_operand" "x")
7160 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7161 "TARGET_SSE4_1"
7162 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7163 [(set_attr "type" "sselog")
7164 (set_attr "prefix_extra" "1")
7165 (set_attr "length_immediate" "1")
7166 (set_attr "prefix" "maybe_vex")
7167 (set_attr "mode" "TI")])
7168
7169 (define_insn "*sse2_pextrw_<mode>"
7170 [(set (match_operand:SWI48 0 "register_operand" "=r")
7171 (zero_extend:SWI48
7172 (vec_select:HI
7173 (match_operand:V8HI 1 "register_operand" "x")
7174 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7175 "TARGET_SSE2"
7176 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7177 [(set_attr "type" "sselog")
7178 (set_attr "prefix_data16" "1")
7179 (set_attr "length_immediate" "1")
7180 (set_attr "prefix" "maybe_vex")
7181 (set_attr "mode" "TI")])
7182
7183 (define_insn "*sse4_1_pextrw_memory"
7184 [(set (match_operand:HI 0 "memory_operand" "=m")
7185 (vec_select:HI
7186 (match_operand:V8HI 1 "register_operand" "x")
7187 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7188 "TARGET_SSE4_1"
7189 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7190 [(set_attr "type" "sselog")
7191 (set_attr "prefix_extra" "1")
7192 (set_attr "length_immediate" "1")
7193 (set_attr "prefix" "maybe_vex")
7194 (set_attr "mode" "TI")])
7195
7196 (define_insn "*sse4_1_pextrd"
7197 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7198 (vec_select:SI
7199 (match_operand:V4SI 1 "register_operand" "x")
7200 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7201 "TARGET_SSE4_1"
7202 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7203 [(set_attr "type" "sselog")
7204 (set_attr "prefix_extra" "1")
7205 (set_attr "length_immediate" "1")
7206 (set_attr "prefix" "maybe_vex")
7207 (set_attr "mode" "TI")])
7208
7209 (define_insn "*sse4_1_pextrd_zext"
7210 [(set (match_operand:DI 0 "register_operand" "=r")
7211 (zero_extend:DI
7212 (vec_select:SI
7213 (match_operand:V4SI 1 "register_operand" "x")
7214 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7215 "TARGET_64BIT && TARGET_SSE4_1"
7216 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7217 [(set_attr "type" "sselog")
7218 (set_attr "prefix_extra" "1")
7219 (set_attr "length_immediate" "1")
7220 (set_attr "prefix" "maybe_vex")
7221 (set_attr "mode" "TI")])
7222
7223 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
7224 (define_insn "*sse4_1_pextrq"
7225 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7226 (vec_select:DI
7227 (match_operand:V2DI 1 "register_operand" "x")
7228 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7229 "TARGET_SSE4_1 && TARGET_64BIT"
7230 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7231 [(set_attr "type" "sselog")
7232 (set_attr "prefix_rex" "1")
7233 (set_attr "prefix_extra" "1")
7234 (set_attr "length_immediate" "1")
7235 (set_attr "prefix" "maybe_vex")
7236 (set_attr "mode" "TI")])
7237
7238 (define_expand "sse2_pshufd"
7239 [(match_operand:V4SI 0 "register_operand" "")
7240 (match_operand:V4SI 1 "nonimmediate_operand" "")
7241 (match_operand:SI 2 "const_int_operand" "")]
7242 "TARGET_SSE2"
7243 {
7244 int mask = INTVAL (operands[2]);
7245 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7246 GEN_INT ((mask >> 0) & 3),
7247 GEN_INT ((mask >> 2) & 3),
7248 GEN_INT ((mask >> 4) & 3),
7249 GEN_INT ((mask >> 6) & 3)));
7250 DONE;
7251 })
7252
7253 (define_insn "sse2_pshufd_1"
7254 [(set (match_operand:V4SI 0 "register_operand" "=x")
7255 (vec_select:V4SI
7256 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7257 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7258 (match_operand 3 "const_0_to_3_operand" "")
7259 (match_operand 4 "const_0_to_3_operand" "")
7260 (match_operand 5 "const_0_to_3_operand" "")])))]
7261 "TARGET_SSE2"
7262 {
7263 int mask = 0;
7264 mask |= INTVAL (operands[2]) << 0;
7265 mask |= INTVAL (operands[3]) << 2;
7266 mask |= INTVAL (operands[4]) << 4;
7267 mask |= INTVAL (operands[5]) << 6;
7268 operands[2] = GEN_INT (mask);
7269
7270 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7271 }
7272 [(set_attr "type" "sselog1")
7273 (set_attr "prefix_data16" "1")
7274 (set_attr "prefix" "maybe_vex")
7275 (set_attr "length_immediate" "1")
7276 (set_attr "mode" "TI")])
7277
7278 (define_expand "sse2_pshuflw"
7279 [(match_operand:V8HI 0 "register_operand" "")
7280 (match_operand:V8HI 1 "nonimmediate_operand" "")
7281 (match_operand:SI 2 "const_int_operand" "")]
7282 "TARGET_SSE2"
7283 {
7284 int mask = INTVAL (operands[2]);
7285 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7286 GEN_INT ((mask >> 0) & 3),
7287 GEN_INT ((mask >> 2) & 3),
7288 GEN_INT ((mask >> 4) & 3),
7289 GEN_INT ((mask >> 6) & 3)));
7290 DONE;
7291 })
7292
7293 (define_insn "sse2_pshuflw_1"
7294 [(set (match_operand:V8HI 0 "register_operand" "=x")
7295 (vec_select:V8HI
7296 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7297 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7298 (match_operand 3 "const_0_to_3_operand" "")
7299 (match_operand 4 "const_0_to_3_operand" "")
7300 (match_operand 5 "const_0_to_3_operand" "")
7301 (const_int 4)
7302 (const_int 5)
7303 (const_int 6)
7304 (const_int 7)])))]
7305 "TARGET_SSE2"
7306 {
7307 int mask = 0;
7308 mask |= INTVAL (operands[2]) << 0;
7309 mask |= INTVAL (operands[3]) << 2;
7310 mask |= INTVAL (operands[4]) << 4;
7311 mask |= INTVAL (operands[5]) << 6;
7312 operands[2] = GEN_INT (mask);
7313
7314 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7315 }
7316 [(set_attr "type" "sselog")
7317 (set_attr "prefix_data16" "0")
7318 (set_attr "prefix_rep" "1")
7319 (set_attr "prefix" "maybe_vex")
7320 (set_attr "length_immediate" "1")
7321 (set_attr "mode" "TI")])
7322
7323 (define_expand "sse2_pshufhw"
7324 [(match_operand:V8HI 0 "register_operand" "")
7325 (match_operand:V8HI 1 "nonimmediate_operand" "")
7326 (match_operand:SI 2 "const_int_operand" "")]
7327 "TARGET_SSE2"
7328 {
7329 int mask = INTVAL (operands[2]);
7330 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7331 GEN_INT (((mask >> 0) & 3) + 4),
7332 GEN_INT (((mask >> 2) & 3) + 4),
7333 GEN_INT (((mask >> 4) & 3) + 4),
7334 GEN_INT (((mask >> 6) & 3) + 4)));
7335 DONE;
7336 })
7337
7338 (define_insn "sse2_pshufhw_1"
7339 [(set (match_operand:V8HI 0 "register_operand" "=x")
7340 (vec_select:V8HI
7341 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7342 (parallel [(const_int 0)
7343 (const_int 1)
7344 (const_int 2)
7345 (const_int 3)
7346 (match_operand 2 "const_4_to_7_operand" "")
7347 (match_operand 3 "const_4_to_7_operand" "")
7348 (match_operand 4 "const_4_to_7_operand" "")
7349 (match_operand 5 "const_4_to_7_operand" "")])))]
7350 "TARGET_SSE2"
7351 {
7352 int mask = 0;
7353 mask |= (INTVAL (operands[2]) - 4) << 0;
7354 mask |= (INTVAL (operands[3]) - 4) << 2;
7355 mask |= (INTVAL (operands[4]) - 4) << 4;
7356 mask |= (INTVAL (operands[5]) - 4) << 6;
7357 operands[2] = GEN_INT (mask);
7358
7359 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7360 }
7361 [(set_attr "type" "sselog")
7362 (set_attr "prefix_rep" "1")
7363 (set_attr "prefix_data16" "0")
7364 (set_attr "prefix" "maybe_vex")
7365 (set_attr "length_immediate" "1")
7366 (set_attr "mode" "TI")])
7367
7368 (define_expand "sse2_loadd"
7369 [(set (match_operand:V4SI 0 "register_operand" "")
7370 (vec_merge:V4SI
7371 (vec_duplicate:V4SI
7372 (match_operand:SI 1 "nonimmediate_operand" ""))
7373 (match_dup 2)
7374 (const_int 1)))]
7375 "TARGET_SSE"
7376 "operands[2] = CONST0_RTX (V4SImode);")
7377
7378 (define_insn "*avx_loadld"
7379 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
7380 (vec_merge:V4SI
7381 (vec_duplicate:V4SI
7382 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7383 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
7384 (const_int 1)))]
7385 "TARGET_AVX"
7386 "@
7387 vmovd\t{%2, %0|%0, %2}
7388 vmovd\t{%2, %0|%0, %2}
7389 vmovss\t{%2, %1, %0|%0, %1, %2}"
7390 [(set_attr "type" "ssemov")
7391 (set_attr "prefix" "vex")
7392 (set_attr "mode" "TI,TI,V4SF")])
7393
7394 (define_insn "sse2_loadld"
7395 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
7396 (vec_merge:V4SI
7397 (vec_duplicate:V4SI
7398 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
7399 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
7400 (const_int 1)))]
7401 "TARGET_SSE"
7402 "@
7403 movd\t{%2, %0|%0, %2}
7404 movd\t{%2, %0|%0, %2}
7405 movss\t{%2, %0|%0, %2}
7406 movss\t{%2, %0|%0, %2}"
7407 [(set_attr "type" "ssemov")
7408 (set_attr "mode" "TI,TI,V4SF,SF")])
7409
7410 (define_insn_and_split "sse2_stored"
7411 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7412 (vec_select:SI
7413 (match_operand:V4SI 1 "register_operand" "x,Yi")
7414 (parallel [(const_int 0)])))]
7415 "TARGET_SSE"
7416 "#"
7417 "&& reload_completed
7418 && (TARGET_INTER_UNIT_MOVES
7419 || MEM_P (operands [0])
7420 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7421 [(set (match_dup 0) (match_dup 1))]
7422 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7423
7424 (define_insn_and_split "*vec_ext_v4si_mem"
7425 [(set (match_operand:SI 0 "register_operand" "=r")
7426 (vec_select:SI
7427 (match_operand:V4SI 1 "memory_operand" "o")
7428 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7429 ""
7430 "#"
7431 "reload_completed"
7432 [(const_int 0)]
7433 {
7434 int i = INTVAL (operands[2]);
7435
7436 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7437 DONE;
7438 })
7439
7440 (define_expand "sse_storeq"
7441 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7442 (vec_select:DI
7443 (match_operand:V2DI 1 "register_operand" "")
7444 (parallel [(const_int 0)])))]
7445 "TARGET_SSE")
7446
7447 (define_insn "*sse2_storeq_rex64"
7448 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7449 (vec_select:DI
7450 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7451 (parallel [(const_int 0)])))]
7452 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7453 "@
7454 #
7455 #
7456 %vmov{q}\t{%1, %0|%0, %1}"
7457 [(set_attr "type" "*,*,imov")
7458 (set_attr "prefix" "*,*,maybe_vex")
7459 (set_attr "mode" "*,*,DI")])
7460
7461 (define_insn "*sse2_storeq"
7462 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7463 (vec_select:DI
7464 (match_operand:V2DI 1 "register_operand" "x")
7465 (parallel [(const_int 0)])))]
7466 "TARGET_SSE"
7467 "#")
7468
7469 (define_split
7470 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7471 (vec_select:DI
7472 (match_operand:V2DI 1 "register_operand" "")
7473 (parallel [(const_int 0)])))]
7474 "TARGET_SSE
7475 && reload_completed
7476 && (TARGET_INTER_UNIT_MOVES
7477 || MEM_P (operands [0])
7478 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7479 [(set (match_dup 0) (match_dup 1))]
7480 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7481
7482 (define_insn "*vec_extractv2di_1_rex64_avx"
7483 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7484 (vec_select:DI
7485 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7486 (parallel [(const_int 1)])))]
7487 "TARGET_64BIT
7488 && TARGET_AVX
7489 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7490 "@
7491 vmovhps\t{%1, %0|%0, %1}
7492 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7493 vmovq\t{%H1, %0|%0, %H1}
7494 vmov{q}\t{%H1, %0|%0, %H1}"
7495 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7496 (set_attr "length_immediate" "*,1,*,*")
7497 (set_attr "memory" "*,none,*,*")
7498 (set_attr "prefix" "vex")
7499 (set_attr "mode" "V2SF,TI,TI,DI")])
7500
7501 (define_insn "*vec_extractv2di_1_rex64"
7502 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7503 (vec_select:DI
7504 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7505 (parallel [(const_int 1)])))]
7506 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7507 "@
7508 movhps\t{%1, %0|%0, %1}
7509 psrldq\t{$8, %0|%0, 8}
7510 movq\t{%H1, %0|%0, %H1}
7511 mov{q}\t{%H1, %0|%0, %H1}"
7512 [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7513 (set_attr "length_immediate" "*,1,*,*")
7514 (set_attr "memory" "*,none,*,*")
7515 (set_attr "mode" "V2SF,TI,TI,DI")])
7516
7517 (define_insn "*vec_extractv2di_1_avx"
7518 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7519 (vec_select:DI
7520 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7521 (parallel [(const_int 1)])))]
7522 "!TARGET_64BIT
7523 && TARGET_AVX
7524 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7525 "@
7526 vmovhps\t{%1, %0|%0, %1}
7527 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7528 vmovq\t{%H1, %0|%0, %H1}"
7529 [(set_attr "type" "ssemov,sseishft1,ssemov")
7530 (set_attr "length_immediate" "*,1,*")
7531 (set_attr "memory" "*,none,*")
7532 (set_attr "prefix" "vex")
7533 (set_attr "mode" "V2SF,TI,TI")])
7534
7535 (define_insn "*vec_extractv2di_1_sse2"
7536 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7537 (vec_select:DI
7538 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7539 (parallel [(const_int 1)])))]
7540 "!TARGET_64BIT
7541 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7542 "@
7543 movhps\t{%1, %0|%0, %1}
7544 psrldq\t{$8, %0|%0, 8}
7545 movq\t{%H1, %0|%0, %H1}"
7546 [(set_attr "type" "ssemov,sseishft1,ssemov")
7547 (set_attr "length_immediate" "*,1,*")
7548 (set_attr "memory" "*,none,*")
7549 (set_attr "mode" "V2SF,TI,TI")])
7550
7551 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7552 (define_insn "*vec_extractv2di_1_sse"
7553 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7554 (vec_select:DI
7555 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7556 (parallel [(const_int 1)])))]
7557 "!TARGET_SSE2 && TARGET_SSE
7558 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7559 "@
7560 movhps\t{%1, %0|%0, %1}
7561 movhlps\t{%1, %0|%0, %1}
7562 movlps\t{%H1, %0|%0, %H1}"
7563 [(set_attr "type" "ssemov")
7564 (set_attr "mode" "V2SF,V4SF,V2SF")])
7565
7566 (define_insn "*vec_dupv4si_avx"
7567 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7568 (vec_duplicate:V4SI
7569 (match_operand:SI 1 "register_operand" "x,m")))]
7570 "TARGET_AVX"
7571 "@
7572 vpshufd\t{$0, %1, %0|%0, %1, 0}
7573 vbroadcastss\t{%1, %0|%0, %1}"
7574 [(set_attr "type" "sselog1,ssemov")
7575 (set_attr "length_immediate" "1,0")
7576 (set_attr "prefix_extra" "0,1")
7577 (set_attr "prefix" "vex")
7578 (set_attr "mode" "TI,V4SF")])
7579
7580 (define_insn "*vec_dupv4si"
7581 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7582 (vec_duplicate:V4SI
7583 (match_operand:SI 1 "register_operand" " Y2,0")))]
7584 "TARGET_SSE"
7585 "@
7586 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7587 shufps\t{$0, %0, %0|%0, %0, 0}"
7588 [(set_attr "type" "sselog1")
7589 (set_attr "length_immediate" "1")
7590 (set_attr "mode" "TI,V4SF")])
7591
7592 (define_insn "*vec_dupv2di_avx"
7593 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7594 (vec_duplicate:V2DI
7595 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7596 "TARGET_AVX"
7597 "@
7598 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7599 vmovddup\t{%1, %0|%0, %1}"
7600 [(set_attr "type" "sselog1")
7601 (set_attr "prefix" "vex")
7602 (set_attr "mode" "TI,DF")])
7603
7604 (define_insn "*vec_dupv2di_sse3"
7605 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7606 (vec_duplicate:V2DI
7607 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7608 "TARGET_SSE3"
7609 "@
7610 punpcklqdq\t%0, %0
7611 movddup\t{%1, %0|%0, %1}"
7612 [(set_attr "type" "sselog1")
7613 (set_attr "mode" "TI,DF")])
7614
7615 (define_insn "*vec_dupv2di"
7616 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7617 (vec_duplicate:V2DI
7618 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7619 "TARGET_SSE"
7620 "@
7621 punpcklqdq\t%0, %0
7622 movlhps\t%0, %0"
7623 [(set_attr "type" "sselog1,ssemov")
7624 (set_attr "mode" "TI,V4SF")])
7625
7626 (define_insn "*vec_concatv2si_avx"
7627 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7628 (vec_concat:V2SI
7629 (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7630 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7631 "TARGET_AVX"
7632 "@
7633 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7634 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7635 vmovd\t{%1, %0|%0, %1}
7636 punpckldq\t{%2, %0|%0, %2}
7637 movd\t{%1, %0|%0, %1}"
7638 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7639 (set_attr "prefix_extra" "1,*,*,*,*")
7640 (set_attr "length_immediate" "1,*,*,*,*")
7641 (set (attr "prefix")
7642 (if_then_else (eq_attr "alternative" "3,4")
7643 (const_string "orig")
7644 (const_string "vex")))
7645 (set_attr "mode" "TI,TI,TI,DI,DI")])
7646
7647 (define_insn "*vec_concatv2si_sse4_1"
7648 [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
7649 (vec_concat:V2SI
7650 (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7651 (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
7652 "TARGET_SSE4_1"
7653 "@
7654 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7655 punpckldq\t{%2, %0|%0, %2}
7656 movd\t{%1, %0|%0, %1}
7657 punpckldq\t{%2, %0|%0, %2}
7658 movd\t{%1, %0|%0, %1}"
7659 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7660 (set_attr "prefix_extra" "1,*,*,*,*")
7661 (set_attr "length_immediate" "1,*,*,*,*")
7662 (set_attr "mode" "TI,TI,TI,DI,DI")])
7663
7664 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7665 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7666 ;; alternatives pretty much forces the MMX alternative to be chosen.
7667 (define_insn "*vec_concatv2si_sse2"
7668 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7669 (vec_concat:V2SI
7670 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7671 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7672 "TARGET_SSE2"
7673 "@
7674 punpckldq\t{%2, %0|%0, %2}
7675 movd\t{%1, %0|%0, %1}
7676 punpckldq\t{%2, %0|%0, %2}
7677 movd\t{%1, %0|%0, %1}"
7678 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7679 (set_attr "mode" "TI,TI,DI,DI")])
7680
7681 (define_insn "*vec_concatv2si_sse"
7682 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7683 (vec_concat:V2SI
7684 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7685 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7686 "TARGET_SSE"
7687 "@
7688 unpcklps\t{%2, %0|%0, %2}
7689 movss\t{%1, %0|%0, %1}
7690 punpckldq\t{%2, %0|%0, %2}
7691 movd\t{%1, %0|%0, %1}"
7692 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7693 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7694
7695 (define_insn "*vec_concatv4si_1_avx"
7696 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7697 (vec_concat:V4SI
7698 (match_operand:V2SI 1 "register_operand" " x,x")
7699 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7700 "TARGET_AVX"
7701 "@
7702 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7703 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7704 [(set_attr "type" "sselog,ssemov")
7705 (set_attr "prefix" "vex")
7706 (set_attr "mode" "TI,V2SF")])
7707
7708 (define_insn "*vec_concatv4si_1"
7709 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
7710 (vec_concat:V4SI
7711 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
7712 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7713 "TARGET_SSE"
7714 "@
7715 punpcklqdq\t{%2, %0|%0, %2}
7716 movlhps\t{%2, %0|%0, %2}
7717 movhps\t{%2, %0|%0, %2}"
7718 [(set_attr "type" "sselog,ssemov,ssemov")
7719 (set_attr "mode" "TI,V4SF,V2SF")])
7720
7721 (define_insn "*vec_concatv2di_avx"
7722 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
7723 (vec_concat:V2DI
7724 (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7725 (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
7726 "!TARGET_64BIT && TARGET_AVX"
7727 "@
7728 vmovq\t{%1, %0|%0, %1}
7729 movq2dq\t{%1, %0|%0, %1}
7730 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7731 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7732 [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7733 (set (attr "prefix")
7734 (if_then_else (eq_attr "alternative" "1")
7735 (const_string "orig")
7736 (const_string "vex")))
7737 (set_attr "mode" "TI,TI,TI,V2SF")])
7738
7739 (define_insn "vec_concatv2di"
7740 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
7741 (vec_concat:V2DI
7742 (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7743 (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
7744 "!TARGET_64BIT && TARGET_SSE"
7745 "@
7746 movq\t{%1, %0|%0, %1}
7747 movq2dq\t{%1, %0|%0, %1}
7748 punpcklqdq\t{%2, %0|%0, %2}
7749 movlhps\t{%2, %0|%0, %2}
7750 movhps\t{%2, %0|%0, %2}"
7751 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7752 (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7753
7754 (define_insn "*vec_concatv2di_rex64_avx"
7755 [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
7756 (vec_concat:V2DI
7757 (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7758 (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
7759 "TARGET_64BIT && TARGET_AVX"
7760 "@
7761 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7762 vmovq\t{%1, %0|%0, %1}
7763 vmovq\t{%1, %0|%0, %1}
7764 movq2dq\t{%1, %0|%0, %1}
7765 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7766 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7767 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7768 (set_attr "prefix_extra" "1,*,*,*,*,*")
7769 (set_attr "length_immediate" "1,*,*,*,*,*")
7770 (set (attr "prefix")
7771 (if_then_else (eq_attr "alternative" "3")
7772 (const_string "orig")
7773 (const_string "vex")))
7774 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7775
7776 (define_insn "*vec_concatv2di_rex64_sse4_1"
7777 [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
7778 (vec_concat:V2DI
7779 (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7780 (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
7781 "TARGET_64BIT && TARGET_SSE4_1"
7782 "@
7783 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7784 movq\t{%1, %0|%0, %1}
7785 movq\t{%1, %0|%0, %1}
7786 movq2dq\t{%1, %0|%0, %1}
7787 punpcklqdq\t{%2, %0|%0, %2}
7788 movlhps\t{%2, %0|%0, %2}
7789 movhps\t{%2, %0|%0, %2}"
7790 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7791 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7792 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7793 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7794 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7795
7796 (define_insn "*vec_concatv2di_rex64_sse"
7797 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7798 (vec_concat:V2DI
7799 (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7800 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
7801 "TARGET_64BIT && TARGET_SSE"
7802 "@
7803 movq\t{%1, %0|%0, %1}
7804 movq\t{%1, %0|%0, %1}
7805 movq2dq\t{%1, %0|%0, %1}
7806 punpcklqdq\t{%2, %0|%0, %2}
7807 movlhps\t{%2, %0|%0, %2}
7808 movhps\t{%2, %0|%0, %2}"
7809 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7810 (set_attr "prefix_rex" "*,1,*,*,*,*")
7811 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7812
7813 (define_expand "vec_unpacku_hi_v16qi"
7814 [(match_operand:V8HI 0 "register_operand" "")
7815 (match_operand:V16QI 1 "register_operand" "")]
7816 "TARGET_SSE2"
7817 {
7818 if (TARGET_SSE4_1)
7819 ix86_expand_sse4_unpack (operands, true, true);
7820 else
7821 ix86_expand_sse_unpack (operands, true, true);
7822 DONE;
7823 })
7824
7825 (define_expand "vec_unpacks_hi_v16qi"
7826 [(match_operand:V8HI 0 "register_operand" "")
7827 (match_operand:V16QI 1 "register_operand" "")]
7828 "TARGET_SSE2"
7829 {
7830 if (TARGET_SSE4_1)
7831 ix86_expand_sse4_unpack (operands, false, true);
7832 else
7833 ix86_expand_sse_unpack (operands, false, true);
7834 DONE;
7835 })
7836
7837 (define_expand "vec_unpacku_lo_v16qi"
7838 [(match_operand:V8HI 0 "register_operand" "")
7839 (match_operand:V16QI 1 "register_operand" "")]
7840 "TARGET_SSE2"
7841 {
7842 if (TARGET_SSE4_1)
7843 ix86_expand_sse4_unpack (operands, true, false);
7844 else
7845 ix86_expand_sse_unpack (operands, true, false);
7846 DONE;
7847 })
7848
7849 (define_expand "vec_unpacks_lo_v16qi"
7850 [(match_operand:V8HI 0 "register_operand" "")
7851 (match_operand:V16QI 1 "register_operand" "")]
7852 "TARGET_SSE2"
7853 {
7854 if (TARGET_SSE4_1)
7855 ix86_expand_sse4_unpack (operands, false, false);
7856 else
7857 ix86_expand_sse_unpack (operands, false, false);
7858 DONE;
7859 })
7860
7861 (define_expand "vec_unpacku_hi_v8hi"
7862 [(match_operand:V4SI 0 "register_operand" "")
7863 (match_operand:V8HI 1 "register_operand" "")]
7864 "TARGET_SSE2"
7865 {
7866 if (TARGET_SSE4_1)
7867 ix86_expand_sse4_unpack (operands, true, true);
7868 else
7869 ix86_expand_sse_unpack (operands, true, true);
7870 DONE;
7871 })
7872
7873 (define_expand "vec_unpacks_hi_v8hi"
7874 [(match_operand:V4SI 0 "register_operand" "")
7875 (match_operand:V8HI 1 "register_operand" "")]
7876 "TARGET_SSE2"
7877 {
7878 if (TARGET_SSE4_1)
7879 ix86_expand_sse4_unpack (operands, false, true);
7880 else
7881 ix86_expand_sse_unpack (operands, false, true);
7882 DONE;
7883 })
7884
7885 (define_expand "vec_unpacku_lo_v8hi"
7886 [(match_operand:V4SI 0 "register_operand" "")
7887 (match_operand:V8HI 1 "register_operand" "")]
7888 "TARGET_SSE2"
7889 {
7890 if (TARGET_SSE4_1)
7891 ix86_expand_sse4_unpack (operands, true, false);
7892 else
7893 ix86_expand_sse_unpack (operands, true, false);
7894 DONE;
7895 })
7896
7897 (define_expand "vec_unpacks_lo_v8hi"
7898 [(match_operand:V4SI 0 "register_operand" "")
7899 (match_operand:V8HI 1 "register_operand" "")]
7900 "TARGET_SSE2"
7901 {
7902 if (TARGET_SSE4_1)
7903 ix86_expand_sse4_unpack (operands, false, false);
7904 else
7905 ix86_expand_sse_unpack (operands, false, false);
7906 DONE;
7907 })
7908
7909 (define_expand "vec_unpacku_hi_v4si"
7910 [(match_operand:V2DI 0 "register_operand" "")
7911 (match_operand:V4SI 1 "register_operand" "")]
7912 "TARGET_SSE2"
7913 {
7914 if (TARGET_SSE4_1)
7915 ix86_expand_sse4_unpack (operands, true, true);
7916 else
7917 ix86_expand_sse_unpack (operands, true, true);
7918 DONE;
7919 })
7920
7921 (define_expand "vec_unpacks_hi_v4si"
7922 [(match_operand:V2DI 0 "register_operand" "")
7923 (match_operand:V4SI 1 "register_operand" "")]
7924 "TARGET_SSE2"
7925 {
7926 if (TARGET_SSE4_1)
7927 ix86_expand_sse4_unpack (operands, false, true);
7928 else
7929 ix86_expand_sse_unpack (operands, false, true);
7930 DONE;
7931 })
7932
7933 (define_expand "vec_unpacku_lo_v4si"
7934 [(match_operand:V2DI 0 "register_operand" "")
7935 (match_operand:V4SI 1 "register_operand" "")]
7936 "TARGET_SSE2"
7937 {
7938 if (TARGET_SSE4_1)
7939 ix86_expand_sse4_unpack (operands, true, false);
7940 else
7941 ix86_expand_sse_unpack (operands, true, false);
7942 DONE;
7943 })
7944
7945 (define_expand "vec_unpacks_lo_v4si"
7946 [(match_operand:V2DI 0 "register_operand" "")
7947 (match_operand:V4SI 1 "register_operand" "")]
7948 "TARGET_SSE2"
7949 {
7950 if (TARGET_SSE4_1)
7951 ix86_expand_sse4_unpack (operands, false, false);
7952 else
7953 ix86_expand_sse_unpack (operands, false, false);
7954 DONE;
7955 })
7956
7957 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7958 ;;
7959 ;; Miscellaneous
7960 ;;
7961 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7962
7963 (define_expand "sse2_uavgv16qi3"
7964 [(set (match_operand:V16QI 0 "register_operand" "")
7965 (truncate:V16QI
7966 (lshiftrt:V16HI
7967 (plus:V16HI
7968 (plus:V16HI
7969 (zero_extend:V16HI
7970 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7971 (zero_extend:V16HI
7972 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7973 (const_vector:V16QI [(const_int 1) (const_int 1)
7974 (const_int 1) (const_int 1)
7975 (const_int 1) (const_int 1)
7976 (const_int 1) (const_int 1)
7977 (const_int 1) (const_int 1)
7978 (const_int 1) (const_int 1)
7979 (const_int 1) (const_int 1)
7980 (const_int 1) (const_int 1)]))
7981 (const_int 1))))]
7982 "TARGET_SSE2"
7983 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7984
7985 (define_insn "*avx_uavgv16qi3"
7986 [(set (match_operand:V16QI 0 "register_operand" "=x")
7987 (truncate:V16QI
7988 (lshiftrt:V16HI
7989 (plus:V16HI
7990 (plus:V16HI
7991 (zero_extend:V16HI
7992 (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7993 (zero_extend:V16HI
7994 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7995 (const_vector:V16QI [(const_int 1) (const_int 1)
7996 (const_int 1) (const_int 1)
7997 (const_int 1) (const_int 1)
7998 (const_int 1) (const_int 1)
7999 (const_int 1) (const_int 1)
8000 (const_int 1) (const_int 1)
8001 (const_int 1) (const_int 1)
8002 (const_int 1) (const_int 1)]))
8003 (const_int 1))))]
8004 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
8005 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
8006 [(set_attr "type" "sseiadd")
8007 (set_attr "prefix" "vex")
8008 (set_attr "mode" "TI")])
8009
8010 (define_insn "*sse2_uavgv16qi3"
8011 [(set (match_operand:V16QI 0 "register_operand" "=x")
8012 (truncate:V16QI
8013 (lshiftrt:V16HI
8014 (plus:V16HI
8015 (plus:V16HI
8016 (zero_extend:V16HI
8017 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
8018 (zero_extend:V16HI
8019 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
8020 (const_vector:V16QI [(const_int 1) (const_int 1)
8021 (const_int 1) (const_int 1)
8022 (const_int 1) (const_int 1)
8023 (const_int 1) (const_int 1)
8024 (const_int 1) (const_int 1)
8025 (const_int 1) (const_int 1)
8026 (const_int 1) (const_int 1)
8027 (const_int 1) (const_int 1)]))
8028 (const_int 1))))]
8029 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
8030 "pavgb\t{%2, %0|%0, %2}"
8031 [(set_attr "type" "sseiadd")
8032 (set_attr "prefix_data16" "1")
8033 (set_attr "mode" "TI")])
8034
8035 (define_expand "sse2_uavgv8hi3"
8036 [(set (match_operand:V8HI 0 "register_operand" "")
8037 (truncate:V8HI
8038 (lshiftrt:V8SI
8039 (plus:V8SI
8040 (plus:V8SI
8041 (zero_extend:V8SI
8042 (match_operand:V8HI 1 "nonimmediate_operand" ""))
8043 (zero_extend:V8SI
8044 (match_operand:V8HI 2 "nonimmediate_operand" "")))
8045 (const_vector:V8HI [(const_int 1) (const_int 1)
8046 (const_int 1) (const_int 1)
8047 (const_int 1) (const_int 1)
8048 (const_int 1) (const_int 1)]))
8049 (const_int 1))))]
8050 "TARGET_SSE2"
8051 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
8052
8053 (define_insn "*avx_uavgv8hi3"
8054 [(set (match_operand:V8HI 0 "register_operand" "=x")
8055 (truncate:V8HI
8056 (lshiftrt:V8SI
8057 (plus:V8SI
8058 (plus:V8SI
8059 (zero_extend:V8SI
8060 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
8061 (zero_extend:V8SI
8062 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8063 (const_vector:V8HI [(const_int 1) (const_int 1)
8064 (const_int 1) (const_int 1)
8065 (const_int 1) (const_int 1)
8066 (const_int 1) (const_int 1)]))
8067 (const_int 1))))]
8068 "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8069 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
8070 [(set_attr "type" "sseiadd")
8071 (set_attr "prefix" "vex")
8072 (set_attr "mode" "TI")])
8073
8074 (define_insn "*sse2_uavgv8hi3"
8075 [(set (match_operand:V8HI 0 "register_operand" "=x")
8076 (truncate:V8HI
8077 (lshiftrt:V8SI
8078 (plus:V8SI
8079 (plus:V8SI
8080 (zero_extend:V8SI
8081 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
8082 (zero_extend:V8SI
8083 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
8084 (const_vector:V8HI [(const_int 1) (const_int 1)
8085 (const_int 1) (const_int 1)
8086 (const_int 1) (const_int 1)
8087 (const_int 1) (const_int 1)]))
8088 (const_int 1))))]
8089 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8090 "pavgw\t{%2, %0|%0, %2}"
8091 [(set_attr "type" "sseiadd")
8092 (set_attr "prefix_data16" "1")
8093 (set_attr "mode" "TI")])
8094
8095 ;; The correct representation for this is absolutely enormous, and
8096 ;; surely not generally useful.
8097 (define_insn "*avx_psadbw"
8098 [(set (match_operand:V2DI 0 "register_operand" "=x")
8099 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
8100 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8101 UNSPEC_PSADBW))]
8102 "TARGET_AVX"
8103 "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8104 [(set_attr "type" "sseiadd")
8105 (set_attr "prefix" "vex")
8106 (set_attr "mode" "TI")])
8107
8108 (define_insn "sse2_psadbw"
8109 [(set (match_operand:V2DI 0 "register_operand" "=x")
8110 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
8111 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8112 UNSPEC_PSADBW))]
8113 "TARGET_SSE2"
8114 "psadbw\t{%2, %0|%0, %2}"
8115 [(set_attr "type" "sseiadd")
8116 (set_attr "atom_unit" "simul")
8117 (set_attr "prefix_data16" "1")
8118 (set_attr "mode" "TI")])
8119
8120 (define_insn "avx_movmsk<ssemodesuffix>256"
8121 [(set (match_operand:SI 0 "register_operand" "=r")
8122 (unspec:SI
8123 [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8124 UNSPEC_MOVMSK))]
8125 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
8126 "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8127 [(set_attr "type" "ssecvt")
8128 (set_attr "prefix" "vex")
8129 (set_attr "mode" "<MODE>")])
8130
8131 (define_insn "<sse>_movmsk<ssemodesuffix>"
8132 [(set (match_operand:SI 0 "register_operand" "=r")
8133 (unspec:SI
8134 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8135 UNSPEC_MOVMSK))]
8136 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
8137 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
8138 [(set_attr "type" "ssemov")
8139 (set_attr "prefix" "maybe_vex")
8140 (set_attr "mode" "<MODE>")])
8141
8142 (define_insn "sse2_pmovmskb"
8143 [(set (match_operand:SI 0 "register_operand" "=r")
8144 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8145 UNSPEC_MOVMSK))]
8146 "TARGET_SSE2"
8147 "%vpmovmskb\t{%1, %0|%0, %1}"
8148 [(set_attr "type" "ssemov")
8149 (set_attr "prefix_data16" "1")
8150 (set_attr "prefix" "maybe_vex")
8151 (set_attr "mode" "SI")])
8152
8153 (define_expand "sse2_maskmovdqu"
8154 [(set (match_operand:V16QI 0 "memory_operand" "")
8155 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8156 (match_operand:V16QI 2 "register_operand" "")
8157 (match_dup 0)]
8158 UNSPEC_MASKMOV))]
8159 "TARGET_SSE2")
8160
8161 (define_insn "*sse2_maskmovdqu"
8162 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8163 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8164 (match_operand:V16QI 2 "register_operand" "x")
8165 (mem:V16QI (match_dup 0))]
8166 UNSPEC_MASKMOV))]
8167 "TARGET_SSE2 && !TARGET_64BIT"
8168 ;; @@@ check ordering of operands in intel/nonintel syntax
8169 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8170 [(set_attr "type" "ssemov")
8171 (set_attr "prefix_data16" "1")
8172 ;; The implicit %rdi operand confuses default length_vex computation.
8173 (set_attr "length_vex" "3")
8174 (set_attr "prefix" "maybe_vex")
8175 (set_attr "mode" "TI")])
8176
8177 (define_insn "*sse2_maskmovdqu_rex64"
8178 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8179 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8180 (match_operand:V16QI 2 "register_operand" "x")
8181 (mem:V16QI (match_dup 0))]
8182 UNSPEC_MASKMOV))]
8183 "TARGET_SSE2 && TARGET_64BIT"
8184 ;; @@@ check ordering of operands in intel/nonintel syntax
8185 "%vmaskmovdqu\t{%2, %1|%1, %2}"
8186 [(set_attr "type" "ssemov")
8187 (set_attr "prefix_data16" "1")
8188 ;; The implicit %rdi operand confuses default length_vex computation.
8189 (set (attr "length_vex")
8190 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8191 (set_attr "prefix" "maybe_vex")
8192 (set_attr "mode" "TI")])
8193
8194 (define_insn "sse_ldmxcsr"
8195 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8196 UNSPECV_LDMXCSR)]
8197 "TARGET_SSE"
8198 "%vldmxcsr\t%0"
8199 [(set_attr "type" "sse")
8200 (set_attr "atom_sse_attr" "mxcsr")
8201 (set_attr "prefix" "maybe_vex")
8202 (set_attr "memory" "load")])
8203
8204 (define_insn "sse_stmxcsr"
8205 [(set (match_operand:SI 0 "memory_operand" "=m")
8206 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8207 "TARGET_SSE"
8208 "%vstmxcsr\t%0"
8209 [(set_attr "type" "sse")
8210 (set_attr "atom_sse_attr" "mxcsr")
8211 (set_attr "prefix" "maybe_vex")
8212 (set_attr "memory" "store")])
8213
8214 (define_expand "sse_sfence"
8215 [(set (match_dup 0)
8216 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8217 "TARGET_SSE || TARGET_3DNOW_A"
8218 {
8219 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8220 MEM_VOLATILE_P (operands[0]) = 1;
8221 })
8222
8223 (define_insn "*sse_sfence"
8224 [(set (match_operand:BLK 0 "" "")
8225 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8226 "TARGET_SSE || TARGET_3DNOW_A"
8227 "sfence"
8228 [(set_attr "type" "sse")
8229 (set_attr "length_address" "0")
8230 (set_attr "atom_sse_attr" "fence")
8231 (set_attr "memory" "unknown")])
8232
8233 (define_insn "sse2_clflush"
8234 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8235 UNSPECV_CLFLUSH)]
8236 "TARGET_SSE2"
8237 "clflush\t%a0"
8238 [(set_attr "type" "sse")
8239 (set_attr "atom_sse_attr" "fence")
8240 (set_attr "memory" "unknown")])
8241
8242 (define_expand "sse2_mfence"
8243 [(set (match_dup 0)
8244 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8245 "TARGET_SSE2"
8246 {
8247 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8248 MEM_VOLATILE_P (operands[0]) = 1;
8249 })
8250
8251 (define_insn "*sse2_mfence"
8252 [(set (match_operand:BLK 0 "" "")
8253 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8254 "TARGET_64BIT || TARGET_SSE2"
8255 "mfence"
8256 [(set_attr "type" "sse")
8257 (set_attr "length_address" "0")
8258 (set_attr "atom_sse_attr" "fence")
8259 (set_attr "memory" "unknown")])
8260
8261 (define_expand "sse2_lfence"
8262 [(set (match_dup 0)
8263 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8264 "TARGET_SSE2"
8265 {
8266 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8267 MEM_VOLATILE_P (operands[0]) = 1;
8268 })
8269
8270 (define_insn "*sse2_lfence"
8271 [(set (match_operand:BLK 0 "" "")
8272 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8273 "TARGET_SSE2"
8274 "lfence"
8275 [(set_attr "type" "sse")
8276 (set_attr "length_address" "0")
8277 (set_attr "atom_sse_attr" "lfence")
8278 (set_attr "memory" "unknown")])
8279
8280 (define_insn "sse3_mwait"
8281 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8282 (match_operand:SI 1 "register_operand" "c")]
8283 UNSPECV_MWAIT)]
8284 "TARGET_SSE3"
8285 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8286 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8287 ;; we only need to set up 32bit registers.
8288 "mwait"
8289 [(set_attr "length" "3")])
8290
8291 (define_insn "sse3_monitor"
8292 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8293 (match_operand:SI 1 "register_operand" "c")
8294 (match_operand:SI 2 "register_operand" "d")]
8295 UNSPECV_MONITOR)]
8296 "TARGET_SSE3 && !TARGET_64BIT"
8297 "monitor\t%0, %1, %2"
8298 [(set_attr "length" "3")])
8299
8300 (define_insn "sse3_monitor64"
8301 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8302 (match_operand:SI 1 "register_operand" "c")
8303 (match_operand:SI 2 "register_operand" "d")]
8304 UNSPECV_MONITOR)]
8305 "TARGET_SSE3 && TARGET_64BIT"
8306 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8307 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8308 ;; zero extended to 64bit, we only need to set up 32bit registers.
8309 "monitor"
8310 [(set_attr "length" "3")])
8311
8312 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8313 ;;
8314 ;; SSSE3 instructions
8315 ;;
8316 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8317
8318 (define_insn "*avx_phaddwv8hi3"
8319 [(set (match_operand:V8HI 0 "register_operand" "=x")
8320 (vec_concat:V8HI
8321 (vec_concat:V4HI
8322 (vec_concat:V2HI
8323 (plus:HI
8324 (vec_select:HI
8325 (match_operand:V8HI 1 "register_operand" "x")
8326 (parallel [(const_int 0)]))
8327 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8328 (plus:HI
8329 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8330 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8331 (vec_concat:V2HI
8332 (plus:HI
8333 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8334 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8335 (plus:HI
8336 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8337 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8338 (vec_concat:V4HI
8339 (vec_concat:V2HI
8340 (plus:HI
8341 (vec_select:HI
8342 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8343 (parallel [(const_int 0)]))
8344 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8345 (plus:HI
8346 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8347 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8348 (vec_concat:V2HI
8349 (plus:HI
8350 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8351 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8352 (plus:HI
8353 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8354 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8355 "TARGET_AVX"
8356 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8357 [(set_attr "type" "sseiadd")
8358 (set_attr "prefix_extra" "1")
8359 (set_attr "prefix" "vex")
8360 (set_attr "mode" "TI")])
8361
8362 (define_insn "ssse3_phaddwv8hi3"
8363 [(set (match_operand:V8HI 0 "register_operand" "=x")
8364 (vec_concat:V8HI
8365 (vec_concat:V4HI
8366 (vec_concat:V2HI
8367 (plus:HI
8368 (vec_select:HI
8369 (match_operand:V8HI 1 "register_operand" "0")
8370 (parallel [(const_int 0)]))
8371 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8372 (plus:HI
8373 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8374 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8375 (vec_concat:V2HI
8376 (plus:HI
8377 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8378 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8379 (plus:HI
8380 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8381 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8382 (vec_concat:V4HI
8383 (vec_concat:V2HI
8384 (plus:HI
8385 (vec_select:HI
8386 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8387 (parallel [(const_int 0)]))
8388 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8389 (plus:HI
8390 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8391 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8392 (vec_concat:V2HI
8393 (plus:HI
8394 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8395 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8396 (plus:HI
8397 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8398 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8399 "TARGET_SSSE3"
8400 "phaddw\t{%2, %0|%0, %2}"
8401 [(set_attr "type" "sseiadd")
8402 (set_attr "atom_unit" "complex")
8403 (set_attr "prefix_data16" "1")
8404 (set_attr "prefix_extra" "1")
8405 (set_attr "mode" "TI")])
8406
8407 (define_insn "ssse3_phaddwv4hi3"
8408 [(set (match_operand:V4HI 0 "register_operand" "=y")
8409 (vec_concat:V4HI
8410 (vec_concat:V2HI
8411 (plus:HI
8412 (vec_select:HI
8413 (match_operand:V4HI 1 "register_operand" "0")
8414 (parallel [(const_int 0)]))
8415 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8416 (plus:HI
8417 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8418 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8419 (vec_concat:V2HI
8420 (plus:HI
8421 (vec_select:HI
8422 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8423 (parallel [(const_int 0)]))
8424 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8425 (plus:HI
8426 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8427 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8428 "TARGET_SSSE3"
8429 "phaddw\t{%2, %0|%0, %2}"
8430 [(set_attr "type" "sseiadd")
8431 (set_attr "atom_unit" "complex")
8432 (set_attr "prefix_extra" "1")
8433 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8434 (set_attr "mode" "DI")])
8435
8436 (define_insn "*avx_phadddv4si3"
8437 [(set (match_operand:V4SI 0 "register_operand" "=x")
8438 (vec_concat:V4SI
8439 (vec_concat:V2SI
8440 (plus:SI
8441 (vec_select:SI
8442 (match_operand:V4SI 1 "register_operand" "x")
8443 (parallel [(const_int 0)]))
8444 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8445 (plus:SI
8446 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8447 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8448 (vec_concat:V2SI
8449 (plus:SI
8450 (vec_select:SI
8451 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8452 (parallel [(const_int 0)]))
8453 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8454 (plus:SI
8455 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8456 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8457 "TARGET_AVX"
8458 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8459 [(set_attr "type" "sseiadd")
8460 (set_attr "prefix_extra" "1")
8461 (set_attr "prefix" "vex")
8462 (set_attr "mode" "TI")])
8463
8464 (define_insn "ssse3_phadddv4si3"
8465 [(set (match_operand:V4SI 0 "register_operand" "=x")
8466 (vec_concat:V4SI
8467 (vec_concat:V2SI
8468 (plus:SI
8469 (vec_select:SI
8470 (match_operand:V4SI 1 "register_operand" "0")
8471 (parallel [(const_int 0)]))
8472 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8473 (plus:SI
8474 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8475 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8476 (vec_concat:V2SI
8477 (plus:SI
8478 (vec_select:SI
8479 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8480 (parallel [(const_int 0)]))
8481 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8482 (plus:SI
8483 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8484 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8485 "TARGET_SSSE3"
8486 "phaddd\t{%2, %0|%0, %2}"
8487 [(set_attr "type" "sseiadd")
8488 (set_attr "atom_unit" "complex")
8489 (set_attr "prefix_data16" "1")
8490 (set_attr "prefix_extra" "1")
8491 (set_attr "mode" "TI")])
8492
8493 (define_insn "ssse3_phadddv2si3"
8494 [(set (match_operand:V2SI 0 "register_operand" "=y")
8495 (vec_concat:V2SI
8496 (plus:SI
8497 (vec_select:SI
8498 (match_operand:V2SI 1 "register_operand" "0")
8499 (parallel [(const_int 0)]))
8500 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8501 (plus:SI
8502 (vec_select:SI
8503 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8504 (parallel [(const_int 0)]))
8505 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8506 "TARGET_SSSE3"
8507 "phaddd\t{%2, %0|%0, %2}"
8508 [(set_attr "type" "sseiadd")
8509 (set_attr "atom_unit" "complex")
8510 (set_attr "prefix_extra" "1")
8511 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8512 (set_attr "mode" "DI")])
8513
8514 (define_insn "*avx_phaddswv8hi3"
8515 [(set (match_operand:V8HI 0 "register_operand" "=x")
8516 (vec_concat:V8HI
8517 (vec_concat:V4HI
8518 (vec_concat:V2HI
8519 (ss_plus:HI
8520 (vec_select:HI
8521 (match_operand:V8HI 1 "register_operand" "x")
8522 (parallel [(const_int 0)]))
8523 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8524 (ss_plus:HI
8525 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8526 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8527 (vec_concat:V2HI
8528 (ss_plus:HI
8529 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8530 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8531 (ss_plus:HI
8532 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8533 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8534 (vec_concat:V4HI
8535 (vec_concat:V2HI
8536 (ss_plus:HI
8537 (vec_select:HI
8538 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8539 (parallel [(const_int 0)]))
8540 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8541 (ss_plus:HI
8542 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8543 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8544 (vec_concat:V2HI
8545 (ss_plus:HI
8546 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8547 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8548 (ss_plus:HI
8549 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8550 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8551 "TARGET_AVX"
8552 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8553 [(set_attr "type" "sseiadd")
8554 (set_attr "prefix_extra" "1")
8555 (set_attr "prefix" "vex")
8556 (set_attr "mode" "TI")])
8557
8558 (define_insn "ssse3_phaddswv8hi3"
8559 [(set (match_operand:V8HI 0 "register_operand" "=x")
8560 (vec_concat:V8HI
8561 (vec_concat:V4HI
8562 (vec_concat:V2HI
8563 (ss_plus:HI
8564 (vec_select:HI
8565 (match_operand:V8HI 1 "register_operand" "0")
8566 (parallel [(const_int 0)]))
8567 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8568 (ss_plus:HI
8569 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8570 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8571 (vec_concat:V2HI
8572 (ss_plus:HI
8573 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8574 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8575 (ss_plus:HI
8576 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8577 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8578 (vec_concat:V4HI
8579 (vec_concat:V2HI
8580 (ss_plus:HI
8581 (vec_select:HI
8582 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8583 (parallel [(const_int 0)]))
8584 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8585 (ss_plus:HI
8586 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8587 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8588 (vec_concat:V2HI
8589 (ss_plus:HI
8590 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8591 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8592 (ss_plus:HI
8593 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8594 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8595 "TARGET_SSSE3"
8596 "phaddsw\t{%2, %0|%0, %2}"
8597 [(set_attr "type" "sseiadd")
8598 (set_attr "atom_unit" "complex")
8599 (set_attr "prefix_data16" "1")
8600 (set_attr "prefix_extra" "1")
8601 (set_attr "mode" "TI")])
8602
8603 (define_insn "ssse3_phaddswv4hi3"
8604 [(set (match_operand:V4HI 0 "register_operand" "=y")
8605 (vec_concat:V4HI
8606 (vec_concat:V2HI
8607 (ss_plus:HI
8608 (vec_select:HI
8609 (match_operand:V4HI 1 "register_operand" "0")
8610 (parallel [(const_int 0)]))
8611 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8612 (ss_plus:HI
8613 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8614 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8615 (vec_concat:V2HI
8616 (ss_plus:HI
8617 (vec_select:HI
8618 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8619 (parallel [(const_int 0)]))
8620 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8621 (ss_plus:HI
8622 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8623 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8624 "TARGET_SSSE3"
8625 "phaddsw\t{%2, %0|%0, %2}"
8626 [(set_attr "type" "sseiadd")
8627 (set_attr "atom_unit" "complex")
8628 (set_attr "prefix_extra" "1")
8629 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8630 (set_attr "mode" "DI")])
8631
8632 (define_insn "*avx_phsubwv8hi3"
8633 [(set (match_operand:V8HI 0 "register_operand" "=x")
8634 (vec_concat:V8HI
8635 (vec_concat:V4HI
8636 (vec_concat:V2HI
8637 (minus:HI
8638 (vec_select:HI
8639 (match_operand:V8HI 1 "register_operand" "x")
8640 (parallel [(const_int 0)]))
8641 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8642 (minus:HI
8643 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8644 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8645 (vec_concat:V2HI
8646 (minus:HI
8647 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8648 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8649 (minus:HI
8650 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8651 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8652 (vec_concat:V4HI
8653 (vec_concat:V2HI
8654 (minus:HI
8655 (vec_select:HI
8656 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8657 (parallel [(const_int 0)]))
8658 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8659 (minus:HI
8660 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8661 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8662 (vec_concat:V2HI
8663 (minus:HI
8664 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8665 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8666 (minus:HI
8667 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8668 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8669 "TARGET_AVX"
8670 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8671 [(set_attr "type" "sseiadd")
8672 (set_attr "prefix_extra" "1")
8673 (set_attr "prefix" "vex")
8674 (set_attr "mode" "TI")])
8675
8676 (define_insn "ssse3_phsubwv8hi3"
8677 [(set (match_operand:V8HI 0 "register_operand" "=x")
8678 (vec_concat:V8HI
8679 (vec_concat:V4HI
8680 (vec_concat:V2HI
8681 (minus:HI
8682 (vec_select:HI
8683 (match_operand:V8HI 1 "register_operand" "0")
8684 (parallel [(const_int 0)]))
8685 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8686 (minus:HI
8687 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8688 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8689 (vec_concat:V2HI
8690 (minus:HI
8691 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8692 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8693 (minus:HI
8694 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8695 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8696 (vec_concat:V4HI
8697 (vec_concat:V2HI
8698 (minus:HI
8699 (vec_select:HI
8700 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8701 (parallel [(const_int 0)]))
8702 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8703 (minus:HI
8704 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8705 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8706 (vec_concat:V2HI
8707 (minus:HI
8708 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8709 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8710 (minus:HI
8711 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8712 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8713 "TARGET_SSSE3"
8714 "phsubw\t{%2, %0|%0, %2}"
8715 [(set_attr "type" "sseiadd")
8716 (set_attr "atom_unit" "complex")
8717 (set_attr "prefix_data16" "1")
8718 (set_attr "prefix_extra" "1")
8719 (set_attr "mode" "TI")])
8720
8721 (define_insn "ssse3_phsubwv4hi3"
8722 [(set (match_operand:V4HI 0 "register_operand" "=y")
8723 (vec_concat:V4HI
8724 (vec_concat:V2HI
8725 (minus:HI
8726 (vec_select:HI
8727 (match_operand:V4HI 1 "register_operand" "0")
8728 (parallel [(const_int 0)]))
8729 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8730 (minus:HI
8731 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8732 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8733 (vec_concat:V2HI
8734 (minus:HI
8735 (vec_select:HI
8736 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8737 (parallel [(const_int 0)]))
8738 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8739 (minus:HI
8740 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8741 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8742 "TARGET_SSSE3"
8743 "phsubw\t{%2, %0|%0, %2}"
8744 [(set_attr "type" "sseiadd")
8745 (set_attr "atom_unit" "complex")
8746 (set_attr "prefix_extra" "1")
8747 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8748 (set_attr "mode" "DI")])
8749
8750 (define_insn "*avx_phsubdv4si3"
8751 [(set (match_operand:V4SI 0 "register_operand" "=x")
8752 (vec_concat:V4SI
8753 (vec_concat:V2SI
8754 (minus:SI
8755 (vec_select:SI
8756 (match_operand:V4SI 1 "register_operand" "x")
8757 (parallel [(const_int 0)]))
8758 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8759 (minus:SI
8760 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8761 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8762 (vec_concat:V2SI
8763 (minus:SI
8764 (vec_select:SI
8765 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8766 (parallel [(const_int 0)]))
8767 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8768 (minus:SI
8769 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8770 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8771 "TARGET_AVX"
8772 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8773 [(set_attr "type" "sseiadd")
8774 (set_attr "prefix_extra" "1")
8775 (set_attr "prefix" "vex")
8776 (set_attr "mode" "TI")])
8777
8778 (define_insn "ssse3_phsubdv4si3"
8779 [(set (match_operand:V4SI 0 "register_operand" "=x")
8780 (vec_concat:V4SI
8781 (vec_concat:V2SI
8782 (minus:SI
8783 (vec_select:SI
8784 (match_operand:V4SI 1 "register_operand" "0")
8785 (parallel [(const_int 0)]))
8786 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8787 (minus:SI
8788 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8789 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8790 (vec_concat:V2SI
8791 (minus:SI
8792 (vec_select:SI
8793 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8794 (parallel [(const_int 0)]))
8795 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8796 (minus:SI
8797 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8798 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8799 "TARGET_SSSE3"
8800 "phsubd\t{%2, %0|%0, %2}"
8801 [(set_attr "type" "sseiadd")
8802 (set_attr "atom_unit" "complex")
8803 (set_attr "prefix_data16" "1")
8804 (set_attr "prefix_extra" "1")
8805 (set_attr "mode" "TI")])
8806
8807 (define_insn "ssse3_phsubdv2si3"
8808 [(set (match_operand:V2SI 0 "register_operand" "=y")
8809 (vec_concat:V2SI
8810 (minus:SI
8811 (vec_select:SI
8812 (match_operand:V2SI 1 "register_operand" "0")
8813 (parallel [(const_int 0)]))
8814 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8815 (minus:SI
8816 (vec_select:SI
8817 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8818 (parallel [(const_int 0)]))
8819 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8820 "TARGET_SSSE3"
8821 "phsubd\t{%2, %0|%0, %2}"
8822 [(set_attr "type" "sseiadd")
8823 (set_attr "atom_unit" "complex")
8824 (set_attr "prefix_extra" "1")
8825 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8826 (set_attr "mode" "DI")])
8827
8828 (define_insn "*avx_phsubswv8hi3"
8829 [(set (match_operand:V8HI 0 "register_operand" "=x")
8830 (vec_concat:V8HI
8831 (vec_concat:V4HI
8832 (vec_concat:V2HI
8833 (ss_minus:HI
8834 (vec_select:HI
8835 (match_operand:V8HI 1 "register_operand" "x")
8836 (parallel [(const_int 0)]))
8837 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8838 (ss_minus:HI
8839 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8840 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8841 (vec_concat:V2HI
8842 (ss_minus:HI
8843 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8844 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8845 (ss_minus:HI
8846 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8847 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8848 (vec_concat:V4HI
8849 (vec_concat:V2HI
8850 (ss_minus:HI
8851 (vec_select:HI
8852 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8853 (parallel [(const_int 0)]))
8854 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8855 (ss_minus:HI
8856 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8857 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8858 (vec_concat:V2HI
8859 (ss_minus:HI
8860 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8861 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8862 (ss_minus:HI
8863 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8864 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8865 "TARGET_AVX"
8866 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8867 [(set_attr "type" "sseiadd")
8868 (set_attr "prefix_extra" "1")
8869 (set_attr "prefix" "vex")
8870 (set_attr "mode" "TI")])
8871
8872 (define_insn "ssse3_phsubswv8hi3"
8873 [(set (match_operand:V8HI 0 "register_operand" "=x")
8874 (vec_concat:V8HI
8875 (vec_concat:V4HI
8876 (vec_concat:V2HI
8877 (ss_minus:HI
8878 (vec_select:HI
8879 (match_operand:V8HI 1 "register_operand" "0")
8880 (parallel [(const_int 0)]))
8881 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8882 (ss_minus:HI
8883 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8884 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8885 (vec_concat:V2HI
8886 (ss_minus:HI
8887 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8888 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8889 (ss_minus:HI
8890 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8891 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8892 (vec_concat:V4HI
8893 (vec_concat:V2HI
8894 (ss_minus:HI
8895 (vec_select:HI
8896 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8897 (parallel [(const_int 0)]))
8898 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8899 (ss_minus:HI
8900 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8901 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8902 (vec_concat:V2HI
8903 (ss_minus:HI
8904 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8905 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8906 (ss_minus:HI
8907 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8908 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8909 "TARGET_SSSE3"
8910 "phsubsw\t{%2, %0|%0, %2}"
8911 [(set_attr "type" "sseiadd")
8912 (set_attr "atom_unit" "complex")
8913 (set_attr "prefix_data16" "1")
8914 (set_attr "prefix_extra" "1")
8915 (set_attr "mode" "TI")])
8916
8917 (define_insn "ssse3_phsubswv4hi3"
8918 [(set (match_operand:V4HI 0 "register_operand" "=y")
8919 (vec_concat:V4HI
8920 (vec_concat:V2HI
8921 (ss_minus:HI
8922 (vec_select:HI
8923 (match_operand:V4HI 1 "register_operand" "0")
8924 (parallel [(const_int 0)]))
8925 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8926 (ss_minus:HI
8927 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8928 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8929 (vec_concat:V2HI
8930 (ss_minus:HI
8931 (vec_select:HI
8932 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8933 (parallel [(const_int 0)]))
8934 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8935 (ss_minus:HI
8936 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8937 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8938 "TARGET_SSSE3"
8939 "phsubsw\t{%2, %0|%0, %2}"
8940 [(set_attr "type" "sseiadd")
8941 (set_attr "atom_unit" "complex")
8942 (set_attr "prefix_extra" "1")
8943 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8944 (set_attr "mode" "DI")])
8945
8946 (define_insn "*avx_pmaddubsw128"
8947 [(set (match_operand:V8HI 0 "register_operand" "=x")
8948 (ss_plus:V8HI
8949 (mult:V8HI
8950 (zero_extend:V8HI
8951 (vec_select:V4QI
8952 (match_operand:V16QI 1 "register_operand" "x")
8953 (parallel [(const_int 0)
8954 (const_int 2)
8955 (const_int 4)
8956 (const_int 6)
8957 (const_int 8)
8958 (const_int 10)
8959 (const_int 12)
8960 (const_int 14)])))
8961 (sign_extend:V8HI
8962 (vec_select:V8QI
8963 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8964 (parallel [(const_int 0)
8965 (const_int 2)
8966 (const_int 4)
8967 (const_int 6)
8968 (const_int 8)
8969 (const_int 10)
8970 (const_int 12)
8971 (const_int 14)]))))
8972 (mult:V8HI
8973 (zero_extend:V8HI
8974 (vec_select:V16QI (match_dup 1)
8975 (parallel [(const_int 1)
8976 (const_int 3)
8977 (const_int 5)
8978 (const_int 7)
8979 (const_int 9)
8980 (const_int 11)
8981 (const_int 13)
8982 (const_int 15)])))
8983 (sign_extend:V8HI
8984 (vec_select:V16QI (match_dup 2)
8985 (parallel [(const_int 1)
8986 (const_int 3)
8987 (const_int 5)
8988 (const_int 7)
8989 (const_int 9)
8990 (const_int 11)
8991 (const_int 13)
8992 (const_int 15)]))))))]
8993 "TARGET_AVX"
8994 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8995 [(set_attr "type" "sseiadd")
8996 (set_attr "prefix_extra" "1")
8997 (set_attr "prefix" "vex")
8998 (set_attr "mode" "TI")])
8999
9000 (define_insn "ssse3_pmaddubsw128"
9001 [(set (match_operand:V8HI 0 "register_operand" "=x")
9002 (ss_plus:V8HI
9003 (mult:V8HI
9004 (zero_extend:V8HI
9005 (vec_select:V4QI
9006 (match_operand:V16QI 1 "register_operand" "0")
9007 (parallel [(const_int 0)
9008 (const_int 2)
9009 (const_int 4)
9010 (const_int 6)
9011 (const_int 8)
9012 (const_int 10)
9013 (const_int 12)
9014 (const_int 14)])))
9015 (sign_extend:V8HI
9016 (vec_select:V8QI
9017 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9018 (parallel [(const_int 0)
9019 (const_int 2)
9020 (const_int 4)
9021 (const_int 6)
9022 (const_int 8)
9023 (const_int 10)
9024 (const_int 12)
9025 (const_int 14)]))))
9026 (mult:V8HI
9027 (zero_extend:V8HI
9028 (vec_select:V16QI (match_dup 1)
9029 (parallel [(const_int 1)
9030 (const_int 3)
9031 (const_int 5)
9032 (const_int 7)
9033 (const_int 9)
9034 (const_int 11)
9035 (const_int 13)
9036 (const_int 15)])))
9037 (sign_extend:V8HI
9038 (vec_select:V16QI (match_dup 2)
9039 (parallel [(const_int 1)
9040 (const_int 3)
9041 (const_int 5)
9042 (const_int 7)
9043 (const_int 9)
9044 (const_int 11)
9045 (const_int 13)
9046 (const_int 15)]))))))]
9047 "TARGET_SSSE3"
9048 "pmaddubsw\t{%2, %0|%0, %2}"
9049 [(set_attr "type" "sseiadd")
9050 (set_attr "atom_unit" "simul")
9051 (set_attr "prefix_data16" "1")
9052 (set_attr "prefix_extra" "1")
9053 (set_attr "mode" "TI")])
9054
9055 (define_insn "ssse3_pmaddubsw"
9056 [(set (match_operand:V4HI 0 "register_operand" "=y")
9057 (ss_plus:V4HI
9058 (mult:V4HI
9059 (zero_extend:V4HI
9060 (vec_select:V4QI
9061 (match_operand:V8QI 1 "register_operand" "0")
9062 (parallel [(const_int 0)
9063 (const_int 2)
9064 (const_int 4)
9065 (const_int 6)])))
9066 (sign_extend:V4HI
9067 (vec_select:V4QI
9068 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9069 (parallel [(const_int 0)
9070 (const_int 2)
9071 (const_int 4)
9072 (const_int 6)]))))
9073 (mult:V4HI
9074 (zero_extend:V4HI
9075 (vec_select:V8QI (match_dup 1)
9076 (parallel [(const_int 1)
9077 (const_int 3)
9078 (const_int 5)
9079 (const_int 7)])))
9080 (sign_extend:V4HI
9081 (vec_select:V8QI (match_dup 2)
9082 (parallel [(const_int 1)
9083 (const_int 3)
9084 (const_int 5)
9085 (const_int 7)]))))))]
9086 "TARGET_SSSE3"
9087 "pmaddubsw\t{%2, %0|%0, %2}"
9088 [(set_attr "type" "sseiadd")
9089 (set_attr "atom_unit" "simul")
9090 (set_attr "prefix_extra" "1")
9091 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9092 (set_attr "mode" "DI")])
9093
9094 (define_expand "ssse3_pmulhrswv8hi3"
9095 [(set (match_operand:V8HI 0 "register_operand" "")
9096 (truncate:V8HI
9097 (lshiftrt:V8SI
9098 (plus:V8SI
9099 (lshiftrt:V8SI
9100 (mult:V8SI
9101 (sign_extend:V8SI
9102 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9103 (sign_extend:V8SI
9104 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9105 (const_int 14))
9106 (const_vector:V8HI [(const_int 1) (const_int 1)
9107 (const_int 1) (const_int 1)
9108 (const_int 1) (const_int 1)
9109 (const_int 1) (const_int 1)]))
9110 (const_int 1))))]
9111 "TARGET_SSSE3"
9112 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9113
9114 (define_insn "*avx_pmulhrswv8hi3"
9115 [(set (match_operand:V8HI 0 "register_operand" "=x")
9116 (truncate:V8HI
9117 (lshiftrt:V8SI
9118 (plus:V8SI
9119 (lshiftrt:V8SI
9120 (mult:V8SI
9121 (sign_extend:V8SI
9122 (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9123 (sign_extend:V8SI
9124 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9125 (const_int 14))
9126 (const_vector:V8HI [(const_int 1) (const_int 1)
9127 (const_int 1) (const_int 1)
9128 (const_int 1) (const_int 1)
9129 (const_int 1) (const_int 1)]))
9130 (const_int 1))))]
9131 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9132 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9133 [(set_attr "type" "sseimul")
9134 (set_attr "prefix_extra" "1")
9135 (set_attr "prefix" "vex")
9136 (set_attr "mode" "TI")])
9137
9138 (define_insn "*ssse3_pmulhrswv8hi3"
9139 [(set (match_operand:V8HI 0 "register_operand" "=x")
9140 (truncate:V8HI
9141 (lshiftrt:V8SI
9142 (plus:V8SI
9143 (lshiftrt:V8SI
9144 (mult:V8SI
9145 (sign_extend:V8SI
9146 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9147 (sign_extend:V8SI
9148 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9149 (const_int 14))
9150 (const_vector:V8HI [(const_int 1) (const_int 1)
9151 (const_int 1) (const_int 1)
9152 (const_int 1) (const_int 1)
9153 (const_int 1) (const_int 1)]))
9154 (const_int 1))))]
9155 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9156 "pmulhrsw\t{%2, %0|%0, %2}"
9157 [(set_attr "type" "sseimul")
9158 (set_attr "prefix_data16" "1")
9159 (set_attr "prefix_extra" "1")
9160 (set_attr "mode" "TI")])
9161
9162 (define_expand "ssse3_pmulhrswv4hi3"
9163 [(set (match_operand:V4HI 0 "register_operand" "")
9164 (truncate:V4HI
9165 (lshiftrt:V4SI
9166 (plus:V4SI
9167 (lshiftrt:V4SI
9168 (mult:V4SI
9169 (sign_extend:V4SI
9170 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9171 (sign_extend:V4SI
9172 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9173 (const_int 14))
9174 (const_vector:V4HI [(const_int 1) (const_int 1)
9175 (const_int 1) (const_int 1)]))
9176 (const_int 1))))]
9177 "TARGET_SSSE3"
9178 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9179
9180 (define_insn "*ssse3_pmulhrswv4hi3"
9181 [(set (match_operand:V4HI 0 "register_operand" "=y")
9182 (truncate:V4HI
9183 (lshiftrt:V4SI
9184 (plus:V4SI
9185 (lshiftrt:V4SI
9186 (mult:V4SI
9187 (sign_extend:V4SI
9188 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9189 (sign_extend:V4SI
9190 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9191 (const_int 14))
9192 (const_vector:V4HI [(const_int 1) (const_int 1)
9193 (const_int 1) (const_int 1)]))
9194 (const_int 1))))]
9195 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9196 "pmulhrsw\t{%2, %0|%0, %2}"
9197 [(set_attr "type" "sseimul")
9198 (set_attr "prefix_extra" "1")
9199 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9200 (set_attr "mode" "DI")])
9201
9202 (define_insn "*avx_pshufbv16qi3"
9203 [(set (match_operand:V16QI 0 "register_operand" "=x")
9204 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9205 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9206 UNSPEC_PSHUFB))]
9207 "TARGET_AVX"
9208 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9209 [(set_attr "type" "sselog1")
9210 (set_attr "prefix_extra" "1")
9211 (set_attr "prefix" "vex")
9212 (set_attr "mode" "TI")])
9213
9214 (define_insn "ssse3_pshufbv16qi3"
9215 [(set (match_operand:V16QI 0 "register_operand" "=x")
9216 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9217 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9218 UNSPEC_PSHUFB))]
9219 "TARGET_SSSE3"
9220 "pshufb\t{%2, %0|%0, %2}";
9221 [(set_attr "type" "sselog1")
9222 (set_attr "prefix_data16" "1")
9223 (set_attr "prefix_extra" "1")
9224 (set_attr "mode" "TI")])
9225
9226 (define_insn "ssse3_pshufbv8qi3"
9227 [(set (match_operand:V8QI 0 "register_operand" "=y")
9228 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9229 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9230 UNSPEC_PSHUFB))]
9231 "TARGET_SSSE3"
9232 "pshufb\t{%2, %0|%0, %2}";
9233 [(set_attr "type" "sselog1")
9234 (set_attr "prefix_extra" "1")
9235 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9236 (set_attr "mode" "DI")])
9237
9238 (define_insn "*avx_psign<mode>3"
9239 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9240 (unspec:SSEMODE124
9241 [(match_operand:SSEMODE124 1 "register_operand" "x")
9242 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9243 UNSPEC_PSIGN))]
9244 "TARGET_AVX"
9245 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
9246 [(set_attr "type" "sselog1")
9247 (set_attr "prefix_extra" "1")
9248 (set_attr "prefix" "vex")
9249 (set_attr "mode" "TI")])
9250
9251 (define_insn "ssse3_psign<mode>3"
9252 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9253 (unspec:SSEMODE124
9254 [(match_operand:SSEMODE124 1 "register_operand" "0")
9255 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9256 UNSPEC_PSIGN))]
9257 "TARGET_SSSE3"
9258 "psign<ssevecsize>\t{%2, %0|%0, %2}";
9259 [(set_attr "type" "sselog1")
9260 (set_attr "prefix_data16" "1")
9261 (set_attr "prefix_extra" "1")
9262 (set_attr "mode" "TI")])
9263
9264 (define_insn "ssse3_psign<mode>3"
9265 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9266 (unspec:MMXMODEI
9267 [(match_operand:MMXMODEI 1 "register_operand" "0")
9268 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9269 UNSPEC_PSIGN))]
9270 "TARGET_SSSE3"
9271 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9272 [(set_attr "type" "sselog1")
9273 (set_attr "prefix_extra" "1")
9274 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9275 (set_attr "mode" "DI")])
9276
9277 (define_insn "*avx_palignrti"
9278 [(set (match_operand:TI 0 "register_operand" "=x")
9279 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9280 (match_operand:TI 2 "nonimmediate_operand" "xm")
9281 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9282 UNSPEC_PALIGNR))]
9283 "TARGET_AVX"
9284 {
9285 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9286 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9287 }
9288 [(set_attr "type" "sseishft")
9289 (set_attr "prefix_extra" "1")
9290 (set_attr "length_immediate" "1")
9291 (set_attr "prefix" "vex")
9292 (set_attr "mode" "TI")])
9293
9294 (define_insn "ssse3_palignrti"
9295 [(set (match_operand:TI 0 "register_operand" "=x")
9296 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9297 (match_operand:TI 2 "nonimmediate_operand" "xm")
9298 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9299 UNSPEC_PALIGNR))]
9300 "TARGET_SSSE3"
9301 {
9302 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9303 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9304 }
9305 [(set_attr "type" "sseishft")
9306 (set_attr "atom_unit" "sishuf")
9307 (set_attr "prefix_data16" "1")
9308 (set_attr "prefix_extra" "1")
9309 (set_attr "length_immediate" "1")
9310 (set_attr "mode" "TI")])
9311
9312 (define_insn "ssse3_palignrdi"
9313 [(set (match_operand:DI 0 "register_operand" "=y")
9314 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9315 (match_operand:DI 2 "nonimmediate_operand" "ym")
9316 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9317 UNSPEC_PALIGNR))]
9318 "TARGET_SSSE3"
9319 {
9320 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9321 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9322 }
9323 [(set_attr "type" "sseishft")
9324 (set_attr "atom_unit" "sishuf")
9325 (set_attr "prefix_extra" "1")
9326 (set_attr "length_immediate" "1")
9327 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9328 (set_attr "mode" "DI")])
9329
9330 (define_insn "abs<mode>2"
9331 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9332 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9333 "TARGET_SSSE3"
9334 "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
9335 [(set_attr "type" "sselog1")
9336 (set_attr "prefix_data16" "1")
9337 (set_attr "prefix_extra" "1")
9338 (set_attr "prefix" "maybe_vex")
9339 (set_attr "mode" "TI")])
9340
9341 (define_insn "abs<mode>2"
9342 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9343 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9344 "TARGET_SSSE3"
9345 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9346 [(set_attr "type" "sselog1")
9347 (set_attr "prefix_rep" "0")
9348 (set_attr "prefix_extra" "1")
9349 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9350 (set_attr "mode" "DI")])
9351
9352 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9353 ;;
9354 ;; AMD SSE4A instructions
9355 ;;
9356 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9357
9358 (define_insn "sse4a_movnt<mode>"
9359 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9360 (unspec:MODEF
9361 [(match_operand:MODEF 1 "register_operand" "x")]
9362 UNSPEC_MOVNT))]
9363 "TARGET_SSE4A"
9364 "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
9365 [(set_attr "type" "ssemov")
9366 (set_attr "mode" "<MODE>")])
9367
9368 (define_insn "sse4a_vmmovnt<mode>"
9369 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9370 (unspec:<ssescalarmode>
9371 [(vec_select:<ssescalarmode>
9372 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9373 (parallel [(const_int 0)]))]
9374 UNSPEC_MOVNT))]
9375 "TARGET_SSE4A"
9376 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9377 [(set_attr "type" "ssemov")
9378 (set_attr "mode" "<ssescalarmode>")])
9379
9380 (define_insn "sse4a_extrqi"
9381 [(set (match_operand:V2DI 0 "register_operand" "=x")
9382 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9383 (match_operand 2 "const_int_operand" "")
9384 (match_operand 3 "const_int_operand" "")]
9385 UNSPEC_EXTRQI))]
9386 "TARGET_SSE4A"
9387 "extrq\t{%3, %2, %0|%0, %2, %3}"
9388 [(set_attr "type" "sse")
9389 (set_attr "prefix_data16" "1")
9390 (set_attr "length_immediate" "2")
9391 (set_attr "mode" "TI")])
9392
9393 (define_insn "sse4a_extrq"
9394 [(set (match_operand:V2DI 0 "register_operand" "=x")
9395 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9396 (match_operand:V16QI 2 "register_operand" "x")]
9397 UNSPEC_EXTRQ))]
9398 "TARGET_SSE4A"
9399 "extrq\t{%2, %0|%0, %2}"
9400 [(set_attr "type" "sse")
9401 (set_attr "prefix_data16" "1")
9402 (set_attr "mode" "TI")])
9403
9404 (define_insn "sse4a_insertqi"
9405 [(set (match_operand:V2DI 0 "register_operand" "=x")
9406 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9407 (match_operand:V2DI 2 "register_operand" "x")
9408 (match_operand 3 "const_int_operand" "")
9409 (match_operand 4 "const_int_operand" "")]
9410 UNSPEC_INSERTQI))]
9411 "TARGET_SSE4A"
9412 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9413 [(set_attr "type" "sseins")
9414 (set_attr "prefix_data16" "0")
9415 (set_attr "prefix_rep" "1")
9416 (set_attr "length_immediate" "2")
9417 (set_attr "mode" "TI")])
9418
9419 (define_insn "sse4a_insertq"
9420 [(set (match_operand:V2DI 0 "register_operand" "=x")
9421 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9422 (match_operand:V2DI 2 "register_operand" "x")]
9423 UNSPEC_INSERTQ))]
9424 "TARGET_SSE4A"
9425 "insertq\t{%2, %0|%0, %2}"
9426 [(set_attr "type" "sseins")
9427 (set_attr "prefix_data16" "0")
9428 (set_attr "prefix_rep" "1")
9429 (set_attr "mode" "TI")])
9430
9431 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9432 ;;
9433 ;; Intel SSE4.1 instructions
9434 ;;
9435 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9436
9437 (define_insn "avx_blend<ssemodesuffix><avxmodesuffix>"
9438 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9439 (vec_merge:AVXMODEF2P
9440 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9441 (match_operand:AVXMODEF2P 1 "register_operand" "x")
9442 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9443 "TARGET_AVX"
9444 "vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9445 [(set_attr "type" "ssemov")
9446 (set_attr "prefix_extra" "1")
9447 (set_attr "length_immediate" "1")
9448 (set_attr "prefix" "vex")
9449 (set_attr "mode" "<avxvecmode>")])
9450
9451 (define_insn "avx_blendv<ssemodesuffix><avxmodesuffix>"
9452 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9453 (unspec:AVXMODEF2P
9454 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9455 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9456 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9457 UNSPEC_BLENDV))]
9458 "TARGET_AVX"
9459 "vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9460 [(set_attr "type" "ssemov")
9461 (set_attr "prefix_extra" "1")
9462 (set_attr "length_immediate" "1")
9463 (set_attr "prefix" "vex")
9464 (set_attr "mode" "<avxvecmode>")])
9465
9466 (define_insn "sse4_1_blend<ssemodesuffix>"
9467 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9468 (vec_merge:SSEMODEF2P
9469 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9470 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9471 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
9472 "TARGET_SSE4_1"
9473 "blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9474 [(set_attr "type" "ssemov")
9475 (set_attr "prefix_data16" "1")
9476 (set_attr "prefix_extra" "1")
9477 (set_attr "length_immediate" "1")
9478 (set_attr "mode" "<MODE>")])
9479
9480 (define_insn "sse4_1_blendv<ssemodesuffix>"
9481 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9482 (unspec:SSEMODEF2P
9483 [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9484 (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9485 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9486 UNSPEC_BLENDV))]
9487 "TARGET_SSE4_1"
9488 "blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9489 [(set_attr "type" "ssemov")
9490 (set_attr "prefix_data16" "1")
9491 (set_attr "prefix_extra" "1")
9492 (set_attr "mode" "<MODE>")])
9493
9494 (define_insn "avx_dp<ssemodesuffix><avxmodesuffix>"
9495 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9496 (unspec:AVXMODEF2P
9497 [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9498 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9499 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9500 UNSPEC_DP))]
9501 "TARGET_AVX"
9502 "vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9503 [(set_attr "type" "ssemul")
9504 (set_attr "prefix" "vex")
9505 (set_attr "prefix_extra" "1")
9506 (set_attr "length_immediate" "1")
9507 (set_attr "mode" "<avxvecmode>")])
9508
9509 (define_insn "sse4_1_dp<ssemodesuffix>"
9510 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9511 (unspec:SSEMODEF2P
9512 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9513 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9514 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9515 UNSPEC_DP))]
9516 "TARGET_SSE4_1"
9517 "dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9518 [(set_attr "type" "ssemul")
9519 (set_attr "prefix_data16" "1")
9520 (set_attr "prefix_extra" "1")
9521 (set_attr "length_immediate" "1")
9522 (set_attr "mode" "<MODE>")])
9523
9524 (define_insn "sse4_1_movntdqa"
9525 [(set (match_operand:V2DI 0 "register_operand" "=x")
9526 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9527 UNSPEC_MOVNTDQA))]
9528 "TARGET_SSE4_1"
9529 "%vmovntdqa\t{%1, %0|%0, %1}"
9530 [(set_attr "type" "ssemov")
9531 (set_attr "prefix_extra" "1")
9532 (set_attr "prefix" "maybe_vex")
9533 (set_attr "mode" "TI")])
9534
9535 (define_insn "*avx_mpsadbw"
9536 [(set (match_operand:V16QI 0 "register_operand" "=x")
9537 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9538 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9539 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9540 UNSPEC_MPSADBW))]
9541 "TARGET_AVX"
9542 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9543 [(set_attr "type" "sselog1")
9544 (set_attr "prefix" "vex")
9545 (set_attr "prefix_extra" "1")
9546 (set_attr "length_immediate" "1")
9547 (set_attr "mode" "TI")])
9548
9549 (define_insn "sse4_1_mpsadbw"
9550 [(set (match_operand:V16QI 0 "register_operand" "=x")
9551 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9552 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9553 (match_operand:SI 3 "const_0_to_255_operand" "n")]
9554 UNSPEC_MPSADBW))]
9555 "TARGET_SSE4_1"
9556 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9557 [(set_attr "type" "sselog1")
9558 (set_attr "prefix_extra" "1")
9559 (set_attr "length_immediate" "1")
9560 (set_attr "mode" "TI")])
9561
9562 (define_insn "*avx_packusdw"
9563 [(set (match_operand:V8HI 0 "register_operand" "=x")
9564 (vec_concat:V8HI
9565 (us_truncate:V4HI
9566 (match_operand:V4SI 1 "register_operand" "x"))
9567 (us_truncate:V4HI
9568 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9569 "TARGET_AVX"
9570 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9571 [(set_attr "type" "sselog")
9572 (set_attr "prefix_extra" "1")
9573 (set_attr "prefix" "vex")
9574 (set_attr "mode" "TI")])
9575
9576 (define_insn "sse4_1_packusdw"
9577 [(set (match_operand:V8HI 0 "register_operand" "=x")
9578 (vec_concat:V8HI
9579 (us_truncate:V4HI
9580 (match_operand:V4SI 1 "register_operand" "0"))
9581 (us_truncate:V4HI
9582 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9583 "TARGET_SSE4_1"
9584 "packusdw\t{%2, %0|%0, %2}"
9585 [(set_attr "type" "sselog")
9586 (set_attr "prefix_extra" "1")
9587 (set_attr "mode" "TI")])
9588
9589 (define_insn "*avx_pblendvb"
9590 [(set (match_operand:V16QI 0 "register_operand" "=x")
9591 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9592 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9593 (match_operand:V16QI 3 "register_operand" "x")]
9594 UNSPEC_BLENDV))]
9595 "TARGET_AVX"
9596 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9597 [(set_attr "type" "ssemov")
9598 (set_attr "prefix_extra" "1")
9599 (set_attr "length_immediate" "1")
9600 (set_attr "prefix" "vex")
9601 (set_attr "mode" "TI")])
9602
9603 (define_insn "sse4_1_pblendvb"
9604 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9605 (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
9606 (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9607 (match_operand:V16QI 3 "register_operand" "Yz")]
9608 UNSPEC_BLENDV))]
9609 "TARGET_SSE4_1"
9610 "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9611 [(set_attr "type" "ssemov")
9612 (set_attr "prefix_extra" "1")
9613 (set_attr "mode" "TI")])
9614
9615 (define_insn "*avx_pblendw"
9616 [(set (match_operand:V8HI 0 "register_operand" "=x")
9617 (vec_merge:V8HI
9618 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9619 (match_operand:V8HI 1 "register_operand" "x")
9620 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9621 "TARGET_AVX"
9622 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9623 [(set_attr "type" "ssemov")
9624 (set_attr "prefix" "vex")
9625 (set_attr "prefix_extra" "1")
9626 (set_attr "length_immediate" "1")
9627 (set_attr "mode" "TI")])
9628
9629 (define_insn "sse4_1_pblendw"
9630 [(set (match_operand:V8HI 0 "register_operand" "=x")
9631 (vec_merge:V8HI
9632 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9633 (match_operand:V8HI 1 "register_operand" "0")
9634 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9635 "TARGET_SSE4_1"
9636 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9637 [(set_attr "type" "ssemov")
9638 (set_attr "prefix_extra" "1")
9639 (set_attr "length_immediate" "1")
9640 (set_attr "mode" "TI")])
9641
9642 (define_insn "sse4_1_phminposuw"
9643 [(set (match_operand:V8HI 0 "register_operand" "=x")
9644 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9645 UNSPEC_PHMINPOSUW))]
9646 "TARGET_SSE4_1"
9647 "%vphminposuw\t{%1, %0|%0, %1}"
9648 [(set_attr "type" "sselog1")
9649 (set_attr "prefix_extra" "1")
9650 (set_attr "prefix" "maybe_vex")
9651 (set_attr "mode" "TI")])
9652
9653 (define_insn "sse4_1_<code>v8qiv8hi2"
9654 [(set (match_operand:V8HI 0 "register_operand" "=x")
9655 (any_extend:V8HI
9656 (vec_select:V8QI
9657 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9658 (parallel [(const_int 0)
9659 (const_int 1)
9660 (const_int 2)
9661 (const_int 3)
9662 (const_int 4)
9663 (const_int 5)
9664 (const_int 6)
9665 (const_int 7)]))))]
9666 "TARGET_SSE4_1"
9667 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9668 [(set_attr "type" "ssemov")
9669 (set_attr "prefix_extra" "1")
9670 (set_attr "prefix" "maybe_vex")
9671 (set_attr "mode" "TI")])
9672
9673 (define_insn "sse4_1_<code>v4qiv4si2"
9674 [(set (match_operand:V4SI 0 "register_operand" "=x")
9675 (any_extend:V4SI
9676 (vec_select:V4QI
9677 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9678 (parallel [(const_int 0)
9679 (const_int 1)
9680 (const_int 2)
9681 (const_int 3)]))))]
9682 "TARGET_SSE4_1"
9683 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
9684 [(set_attr "type" "ssemov")
9685 (set_attr "prefix_extra" "1")
9686 (set_attr "prefix" "maybe_vex")
9687 (set_attr "mode" "TI")])
9688
9689 (define_insn "sse4_1_<code>v4hiv4si2"
9690 [(set (match_operand:V4SI 0 "register_operand" "=x")
9691 (any_extend:V4SI
9692 (vec_select:V4HI
9693 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9694 (parallel [(const_int 0)
9695 (const_int 1)
9696 (const_int 2)
9697 (const_int 3)]))))]
9698 "TARGET_SSE4_1"
9699 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9700 [(set_attr "type" "ssemov")
9701 (set_attr "prefix_extra" "1")
9702 (set_attr "prefix" "maybe_vex")
9703 (set_attr "mode" "TI")])
9704
9705 (define_insn "sse4_1_<code>v2qiv2di2"
9706 [(set (match_operand:V2DI 0 "register_operand" "=x")
9707 (any_extend:V2DI
9708 (vec_select:V2QI
9709 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9710 (parallel [(const_int 0)
9711 (const_int 1)]))))]
9712 "TARGET_SSE4_1"
9713 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
9714 [(set_attr "type" "ssemov")
9715 (set_attr "prefix_extra" "1")
9716 (set_attr "prefix" "maybe_vex")
9717 (set_attr "mode" "TI")])
9718
9719 (define_insn "sse4_1_<code>v2hiv2di2"
9720 [(set (match_operand:V2DI 0 "register_operand" "=x")
9721 (any_extend:V2DI
9722 (vec_select:V2HI
9723 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9724 (parallel [(const_int 0)
9725 (const_int 1)]))))]
9726 "TARGET_SSE4_1"
9727 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
9728 [(set_attr "type" "ssemov")
9729 (set_attr "prefix_extra" "1")
9730 (set_attr "prefix" "maybe_vex")
9731 (set_attr "mode" "TI")])
9732
9733 (define_insn "sse4_1_<code>v2siv2di2"
9734 [(set (match_operand:V2DI 0 "register_operand" "=x")
9735 (any_extend:V2DI
9736 (vec_select:V2SI
9737 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9738 (parallel [(const_int 0)
9739 (const_int 1)]))))]
9740 "TARGET_SSE4_1"
9741 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9742 [(set_attr "type" "ssemov")
9743 (set_attr "prefix_extra" "1")
9744 (set_attr "prefix" "maybe_vex")
9745 (set_attr "mode" "TI")])
9746
9747 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9748 ;; setting FLAGS_REG. But it is not a really compare instruction.
9749 (define_insn "avx_vtest<ssemodesuffix><avxmodesuffix>"
9750 [(set (reg:CC FLAGS_REG)
9751 (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9752 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9753 UNSPEC_VTESTP))]
9754 "TARGET_AVX"
9755 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9756 [(set_attr "type" "ssecomi")
9757 (set_attr "prefix_extra" "1")
9758 (set_attr "prefix" "vex")
9759 (set_attr "mode" "<MODE>")])
9760
9761 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9762 ;; But it is not a really compare instruction.
9763 (define_insn "avx_ptest256"
9764 [(set (reg:CC FLAGS_REG)
9765 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9766 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9767 UNSPEC_PTEST))]
9768 "TARGET_AVX"
9769 "vptest\t{%1, %0|%0, %1}"
9770 [(set_attr "type" "ssecomi")
9771 (set_attr "prefix_extra" "1")
9772 (set_attr "prefix" "vex")
9773 (set_attr "mode" "OI")])
9774
9775 (define_insn "sse4_1_ptest"
9776 [(set (reg:CC FLAGS_REG)
9777 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9778 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9779 UNSPEC_PTEST))]
9780 "TARGET_SSE4_1"
9781 "%vptest\t{%1, %0|%0, %1}"
9782 [(set_attr "type" "ssecomi")
9783 (set_attr "prefix_extra" "1")
9784 (set_attr "prefix" "maybe_vex")
9785 (set_attr "mode" "TI")])
9786
9787 (define_insn "avx_round<ssemodesuffix>256"
9788 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9789 (unspec:AVX256MODEF2P
9790 [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9791 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9792 UNSPEC_ROUND))]
9793 "TARGET_AVX"
9794 "vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9795 [(set_attr "type" "ssecvt")
9796 (set_attr "prefix_extra" "1")
9797 (set_attr "length_immediate" "1")
9798 (set_attr "prefix" "vex")
9799 (set_attr "mode" "<MODE>")])
9800
9801 (define_insn "sse4_1_round<ssemodesuffix>"
9802 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9803 (unspec:SSEMODEF2P
9804 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9805 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9806 UNSPEC_ROUND))]
9807 "TARGET_ROUND"
9808 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9809 [(set_attr "type" "ssecvt")
9810 (set_attr "prefix_data16" "1")
9811 (set_attr "prefix_extra" "1")
9812 (set_attr "length_immediate" "1")
9813 (set_attr "prefix" "maybe_vex")
9814 (set_attr "mode" "<MODE>")])
9815
9816 (define_insn "*avx_round<ssescalarmodesuffix>"
9817 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9818 (vec_merge:SSEMODEF2P
9819 (unspec:SSEMODEF2P
9820 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9821 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9822 UNSPEC_ROUND)
9823 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9824 (const_int 1)))]
9825 "TARGET_AVX"
9826 "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9827 [(set_attr "type" "ssecvt")
9828 (set_attr "prefix_extra" "1")
9829 (set_attr "length_immediate" "1")
9830 (set_attr "prefix" "vex")
9831 (set_attr "mode" "<MODE>")])
9832
9833 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9834 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9835 (vec_merge:SSEMODEF2P
9836 (unspec:SSEMODEF2P
9837 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9838 (match_operand:SI 3 "const_0_to_15_operand" "n")]
9839 UNSPEC_ROUND)
9840 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9841 (const_int 1)))]
9842 "TARGET_ROUND"
9843 "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9844 [(set_attr "type" "ssecvt")
9845 (set_attr "prefix_data16" "1")
9846 (set_attr "prefix_extra" "1")
9847 (set_attr "length_immediate" "1")
9848 (set_attr "mode" "<MODE>")])
9849
9850 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9851 ;;
9852 ;; Intel SSE4.2 string/text processing instructions
9853 ;;
9854 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9855
9856 (define_insn_and_split "sse4_2_pcmpestr"
9857 [(set (match_operand:SI 0 "register_operand" "=c,c")
9858 (unspec:SI
9859 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9860 (match_operand:SI 3 "register_operand" "a,a")
9861 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9862 (match_operand:SI 5 "register_operand" "d,d")
9863 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9864 UNSPEC_PCMPESTR))
9865 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9866 (unspec:V16QI
9867 [(match_dup 2)
9868 (match_dup 3)
9869 (match_dup 4)
9870 (match_dup 5)
9871 (match_dup 6)]
9872 UNSPEC_PCMPESTR))
9873 (set (reg:CC FLAGS_REG)
9874 (unspec:CC
9875 [(match_dup 2)
9876 (match_dup 3)
9877 (match_dup 4)
9878 (match_dup 5)
9879 (match_dup 6)]
9880 UNSPEC_PCMPESTR))]
9881 "TARGET_SSE4_2
9882 && can_create_pseudo_p ()"
9883 "#"
9884 "&& 1"
9885 [(const_int 0)]
9886 {
9887 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9888 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9889 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9890
9891 if (ecx)
9892 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9893 operands[3], operands[4],
9894 operands[5], operands[6]));
9895 if (xmm0)
9896 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9897 operands[3], operands[4],
9898 operands[5], operands[6]));
9899 if (flags && !(ecx || xmm0))
9900 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9901 operands[2], operands[3],
9902 operands[4], operands[5],
9903 operands[6]));
9904 DONE;
9905 }
9906 [(set_attr "type" "sselog")
9907 (set_attr "prefix_data16" "1")
9908 (set_attr "prefix_extra" "1")
9909 (set_attr "length_immediate" "1")
9910 (set_attr "memory" "none,load")
9911 (set_attr "mode" "TI")])
9912
9913 (define_insn "sse4_2_pcmpestri"
9914 [(set (match_operand:SI 0 "register_operand" "=c,c")
9915 (unspec:SI
9916 [(match_operand:V16QI 1 "register_operand" "x,x")
9917 (match_operand:SI 2 "register_operand" "a,a")
9918 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9919 (match_operand:SI 4 "register_operand" "d,d")
9920 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9921 UNSPEC_PCMPESTR))
9922 (set (reg:CC FLAGS_REG)
9923 (unspec:CC
9924 [(match_dup 1)
9925 (match_dup 2)
9926 (match_dup 3)
9927 (match_dup 4)
9928 (match_dup 5)]
9929 UNSPEC_PCMPESTR))]
9930 "TARGET_SSE4_2"
9931 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9932 [(set_attr "type" "sselog")
9933 (set_attr "prefix_data16" "1")
9934 (set_attr "prefix_extra" "1")
9935 (set_attr "prefix" "maybe_vex")
9936 (set_attr "length_immediate" "1")
9937 (set_attr "memory" "none,load")
9938 (set_attr "mode" "TI")])
9939
9940 (define_insn "sse4_2_pcmpestrm"
9941 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9942 (unspec:V16QI
9943 [(match_operand:V16QI 1 "register_operand" "x,x")
9944 (match_operand:SI 2 "register_operand" "a,a")
9945 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
9946 (match_operand:SI 4 "register_operand" "d,d")
9947 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
9948 UNSPEC_PCMPESTR))
9949 (set (reg:CC FLAGS_REG)
9950 (unspec:CC
9951 [(match_dup 1)
9952 (match_dup 2)
9953 (match_dup 3)
9954 (match_dup 4)
9955 (match_dup 5)]
9956 UNSPEC_PCMPESTR))]
9957 "TARGET_SSE4_2"
9958 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9959 [(set_attr "type" "sselog")
9960 (set_attr "prefix_data16" "1")
9961 (set_attr "prefix_extra" "1")
9962 (set_attr "length_immediate" "1")
9963 (set_attr "prefix" "maybe_vex")
9964 (set_attr "memory" "none,load")
9965 (set_attr "mode" "TI")])
9966
9967 (define_insn "sse4_2_pcmpestr_cconly"
9968 [(set (reg:CC FLAGS_REG)
9969 (unspec:CC
9970 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
9971 (match_operand:SI 3 "register_operand" "a,a,a,a")
9972 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
9973 (match_operand:SI 5 "register_operand" "d,d,d,d")
9974 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
9975 UNSPEC_PCMPESTR))
9976 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
9977 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
9978 "TARGET_SSE4_2"
9979 "@
9980 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9981 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
9982 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9983 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9984 [(set_attr "type" "sselog")
9985 (set_attr "prefix_data16" "1")
9986 (set_attr "prefix_extra" "1")
9987 (set_attr "length_immediate" "1")
9988 (set_attr "memory" "none,load,none,load")
9989 (set_attr "prefix" "maybe_vex")
9990 (set_attr "mode" "TI")])
9991
9992 (define_insn_and_split "sse4_2_pcmpistr"
9993 [(set (match_operand:SI 0 "register_operand" "=c,c")
9994 (unspec:SI
9995 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9996 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
9997 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
9998 UNSPEC_PCMPISTR))
9999 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10000 (unspec:V16QI
10001 [(match_dup 2)
10002 (match_dup 3)
10003 (match_dup 4)]
10004 UNSPEC_PCMPISTR))
10005 (set (reg:CC FLAGS_REG)
10006 (unspec:CC
10007 [(match_dup 2)
10008 (match_dup 3)
10009 (match_dup 4)]
10010 UNSPEC_PCMPISTR))]
10011 "TARGET_SSE4_2
10012 && can_create_pseudo_p ()"
10013 "#"
10014 "&& 1"
10015 [(const_int 0)]
10016 {
10017 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10018 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10019 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10020
10021 if (ecx)
10022 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10023 operands[3], operands[4]));
10024 if (xmm0)
10025 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10026 operands[3], operands[4]));
10027 if (flags && !(ecx || xmm0))
10028 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10029 operands[2], operands[3],
10030 operands[4]));
10031 DONE;
10032 }
10033 [(set_attr "type" "sselog")
10034 (set_attr "prefix_data16" "1")
10035 (set_attr "prefix_extra" "1")
10036 (set_attr "length_immediate" "1")
10037 (set_attr "memory" "none,load")
10038 (set_attr "mode" "TI")])
10039
10040 (define_insn "sse4_2_pcmpistri"
10041 [(set (match_operand:SI 0 "register_operand" "=c,c")
10042 (unspec:SI
10043 [(match_operand:V16QI 1 "register_operand" "x,x")
10044 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10045 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10046 UNSPEC_PCMPISTR))
10047 (set (reg:CC FLAGS_REG)
10048 (unspec:CC
10049 [(match_dup 1)
10050 (match_dup 2)
10051 (match_dup 3)]
10052 UNSPEC_PCMPISTR))]
10053 "TARGET_SSE4_2"
10054 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10055 [(set_attr "type" "sselog")
10056 (set_attr "prefix_data16" "1")
10057 (set_attr "prefix_extra" "1")
10058 (set_attr "length_immediate" "1")
10059 (set_attr "prefix" "maybe_vex")
10060 (set_attr "memory" "none,load")
10061 (set_attr "mode" "TI")])
10062
10063 (define_insn "sse4_2_pcmpistrm"
10064 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10065 (unspec:V16QI
10066 [(match_operand:V16QI 1 "register_operand" "x,x")
10067 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10068 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10069 UNSPEC_PCMPISTR))
10070 (set (reg:CC FLAGS_REG)
10071 (unspec:CC
10072 [(match_dup 1)
10073 (match_dup 2)
10074 (match_dup 3)]
10075 UNSPEC_PCMPISTR))]
10076 "TARGET_SSE4_2"
10077 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10078 [(set_attr "type" "sselog")
10079 (set_attr "prefix_data16" "1")
10080 (set_attr "prefix_extra" "1")
10081 (set_attr "length_immediate" "1")
10082 (set_attr "prefix" "maybe_vex")
10083 (set_attr "memory" "none,load")
10084 (set_attr "mode" "TI")])
10085
10086 (define_insn "sse4_2_pcmpistr_cconly"
10087 [(set (reg:CC FLAGS_REG)
10088 (unspec:CC
10089 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10090 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10091 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10092 UNSPEC_PCMPISTR))
10093 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10094 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10095 "TARGET_SSE4_2"
10096 "@
10097 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10098 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10099 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10100 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10101 [(set_attr "type" "sselog")
10102 (set_attr "prefix_data16" "1")
10103 (set_attr "prefix_extra" "1")
10104 (set_attr "length_immediate" "1")
10105 (set_attr "memory" "none,load,none,load")
10106 (set_attr "prefix" "maybe_vex")
10107 (set_attr "mode" "TI")])
10108
10109 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10110 ;;
10111 ;; XOP instructions
10112 ;;
10113 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10114
10115 ;; XOP parallel integer multiply/add instructions.
10116 ;; Note the XOP multiply/add instructions
10117 ;; a[i] = b[i] * c[i] + d[i];
10118 ;; do not allow the value being added to be a memory operation.
10119 (define_insn "xop_pmacsww"
10120 [(set (match_operand:V8HI 0 "register_operand" "=x")
10121 (plus:V8HI
10122 (mult:V8HI
10123 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10124 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10125 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10126 "TARGET_XOP"
10127 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10128 [(set_attr "type" "ssemuladd")
10129 (set_attr "mode" "TI")])
10130
10131 (define_insn "xop_pmacssww"
10132 [(set (match_operand:V8HI 0 "register_operand" "=x")
10133 (ss_plus:V8HI
10134 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10135 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10136 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10137 "TARGET_XOP"
10138 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10139 [(set_attr "type" "ssemuladd")
10140 (set_attr "mode" "TI")])
10141
10142 (define_insn "xop_pmacsdd"
10143 [(set (match_operand:V4SI 0 "register_operand" "=x")
10144 (plus:V4SI
10145 (mult:V4SI
10146 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10147 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10148 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10149 "TARGET_XOP"
10150 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10151 [(set_attr "type" "ssemuladd")
10152 (set_attr "mode" "TI")])
10153
10154 (define_insn "xop_pmacssdd"
10155 [(set (match_operand:V4SI 0 "register_operand" "=x")
10156 (ss_plus:V4SI
10157 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10158 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10159 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10160 "TARGET_XOP"
10161 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10162 [(set_attr "type" "ssemuladd")
10163 (set_attr "mode" "TI")])
10164
10165 (define_insn "xop_pmacssdql"
10166 [(set (match_operand:V2DI 0 "register_operand" "=x")
10167 (ss_plus:V2DI
10168 (mult:V2DI
10169 (sign_extend:V2DI
10170 (vec_select:V2SI
10171 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10172 (parallel [(const_int 1)
10173 (const_int 3)])))
10174 (vec_select:V2SI
10175 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10176 (parallel [(const_int 1)
10177 (const_int 3)])))
10178 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10179 "TARGET_XOP"
10180 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10181 [(set_attr "type" "ssemuladd")
10182 (set_attr "mode" "TI")])
10183
10184 (define_insn "xop_pmacssdqh"
10185 [(set (match_operand:V2DI 0 "register_operand" "=x")
10186 (ss_plus:V2DI
10187 (mult:V2DI
10188 (sign_extend:V2DI
10189 (vec_select:V2SI
10190 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10191 (parallel [(const_int 0)
10192 (const_int 2)])))
10193 (sign_extend:V2DI
10194 (vec_select:V2SI
10195 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10196 (parallel [(const_int 0)
10197 (const_int 2)]))))
10198 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10199 "TARGET_XOP"
10200 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10201 [(set_attr "type" "ssemuladd")
10202 (set_attr "mode" "TI")])
10203
10204 (define_insn "xop_pmacsdql"
10205 [(set (match_operand:V2DI 0 "register_operand" "=x")
10206 (plus:V2DI
10207 (mult:V2DI
10208 (sign_extend:V2DI
10209 (vec_select:V2SI
10210 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10211 (parallel [(const_int 1)
10212 (const_int 3)])))
10213 (sign_extend:V2DI
10214 (vec_select:V2SI
10215 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10216 (parallel [(const_int 1)
10217 (const_int 3)]))))
10218 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10219 "TARGET_XOP"
10220 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10221 [(set_attr "type" "ssemuladd")
10222 (set_attr "mode" "TI")])
10223
10224 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10225 ;; fake it with a multiply/add. In general, we expect the define_split to
10226 ;; occur before register allocation, so we have to handle the corner case where
10227 ;; the target is the same as operands 1/2
10228 (define_insn_and_split "xop_mulv2div2di3_low"
10229 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10230 (mult:V2DI
10231 (sign_extend:V2DI
10232 (vec_select:V2SI
10233 (match_operand:V4SI 1 "register_operand" "%x")
10234 (parallel [(const_int 1)
10235 (const_int 3)])))
10236 (sign_extend:V2DI
10237 (vec_select:V2SI
10238 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10239 (parallel [(const_int 1)
10240 (const_int 3)])))))]
10241 "TARGET_XOP"
10242 "#"
10243 "&& reload_completed"
10244 [(set (match_dup 0)
10245 (match_dup 3))
10246 (set (match_dup 0)
10247 (plus:V2DI
10248 (mult:V2DI
10249 (sign_extend:V2DI
10250 (vec_select:V2SI
10251 (match_dup 1)
10252 (parallel [(const_int 1)
10253 (const_int 3)])))
10254 (sign_extend:V2DI
10255 (vec_select:V2SI
10256 (match_dup 2)
10257 (parallel [(const_int 1)
10258 (const_int 3)]))))
10259 (match_dup 0)))]
10260 {
10261 operands[3] = CONST0_RTX (V2DImode);
10262 }
10263 [(set_attr "type" "ssemul")
10264 (set_attr "mode" "TI")])
10265
10266 (define_insn "xop_pmacsdqh"
10267 [(set (match_operand:V2DI 0 "register_operand" "=x")
10268 (plus:V2DI
10269 (mult:V2DI
10270 (sign_extend:V2DI
10271 (vec_select:V2SI
10272 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10273 (parallel [(const_int 0)
10274 (const_int 2)])))
10275 (sign_extend:V2DI
10276 (vec_select:V2SI
10277 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10278 (parallel [(const_int 0)
10279 (const_int 2)]))))
10280 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10281 "TARGET_XOP"
10282 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10283 [(set_attr "type" "ssemuladd")
10284 (set_attr "mode" "TI")])
10285
10286 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10287 ;; fake it with a multiply/add. In general, we expect the define_split to
10288 ;; occur before register allocation, so we have to handle the corner case where
10289 ;; the target is the same as either operands[1] or operands[2]
10290 (define_insn_and_split "xop_mulv2div2di3_high"
10291 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10292 (mult:V2DI
10293 (sign_extend:V2DI
10294 (vec_select:V2SI
10295 (match_operand:V4SI 1 "register_operand" "%x")
10296 (parallel [(const_int 0)
10297 (const_int 2)])))
10298 (sign_extend:V2DI
10299 (vec_select:V2SI
10300 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10301 (parallel [(const_int 0)
10302 (const_int 2)])))))]
10303 "TARGET_XOP"
10304 "#"
10305 "&& reload_completed"
10306 [(set (match_dup 0)
10307 (match_dup 3))
10308 (set (match_dup 0)
10309 (plus:V2DI
10310 (mult:V2DI
10311 (sign_extend:V2DI
10312 (vec_select:V2SI
10313 (match_dup 1)
10314 (parallel [(const_int 0)
10315 (const_int 2)])))
10316 (sign_extend:V2DI
10317 (vec_select:V2SI
10318 (match_dup 2)
10319 (parallel [(const_int 0)
10320 (const_int 2)]))))
10321 (match_dup 0)))]
10322 {
10323 operands[3] = CONST0_RTX (V2DImode);
10324 }
10325 [(set_attr "type" "ssemul")
10326 (set_attr "mode" "TI")])
10327
10328 ;; XOP parallel integer multiply/add instructions for the intrinisics
10329 (define_insn "xop_pmacsswd"
10330 [(set (match_operand:V4SI 0 "register_operand" "=x")
10331 (ss_plus:V4SI
10332 (mult:V4SI
10333 (sign_extend:V4SI
10334 (vec_select:V4HI
10335 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10336 (parallel [(const_int 1)
10337 (const_int 3)
10338 (const_int 5)
10339 (const_int 7)])))
10340 (sign_extend:V4SI
10341 (vec_select:V4HI
10342 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10343 (parallel [(const_int 1)
10344 (const_int 3)
10345 (const_int 5)
10346 (const_int 7)]))))
10347 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10348 "TARGET_XOP"
10349 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10350 [(set_attr "type" "ssemuladd")
10351 (set_attr "mode" "TI")])
10352
10353 (define_insn "xop_pmacswd"
10354 [(set (match_operand:V4SI 0 "register_operand" "=x")
10355 (plus:V4SI
10356 (mult:V4SI
10357 (sign_extend:V4SI
10358 (vec_select:V4HI
10359 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10360 (parallel [(const_int 1)
10361 (const_int 3)
10362 (const_int 5)
10363 (const_int 7)])))
10364 (sign_extend:V4SI
10365 (vec_select:V4HI
10366 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10367 (parallel [(const_int 1)
10368 (const_int 3)
10369 (const_int 5)
10370 (const_int 7)]))))
10371 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10372 "TARGET_XOP"
10373 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10374 [(set_attr "type" "ssemuladd")
10375 (set_attr "mode" "TI")])
10376
10377 (define_insn "xop_pmadcsswd"
10378 [(set (match_operand:V4SI 0 "register_operand" "=x")
10379 (ss_plus:V4SI
10380 (plus:V4SI
10381 (mult:V4SI
10382 (sign_extend:V4SI
10383 (vec_select:V4HI
10384 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10385 (parallel [(const_int 0)
10386 (const_int 2)
10387 (const_int 4)
10388 (const_int 6)])))
10389 (sign_extend:V4SI
10390 (vec_select:V4HI
10391 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10392 (parallel [(const_int 0)
10393 (const_int 2)
10394 (const_int 4)
10395 (const_int 6)]))))
10396 (mult:V4SI
10397 (sign_extend:V4SI
10398 (vec_select:V4HI
10399 (match_dup 1)
10400 (parallel [(const_int 1)
10401 (const_int 3)
10402 (const_int 5)
10403 (const_int 7)])))
10404 (sign_extend:V4SI
10405 (vec_select:V4HI
10406 (match_dup 2)
10407 (parallel [(const_int 1)
10408 (const_int 3)
10409 (const_int 5)
10410 (const_int 7)])))))
10411 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10412 "TARGET_XOP"
10413 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10414 [(set_attr "type" "ssemuladd")
10415 (set_attr "mode" "TI")])
10416
10417 (define_insn "xop_pmadcswd"
10418 [(set (match_operand:V4SI 0 "register_operand" "=x")
10419 (plus:V4SI
10420 (plus:V4SI
10421 (mult:V4SI
10422 (sign_extend:V4SI
10423 (vec_select:V4HI
10424 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10425 (parallel [(const_int 0)
10426 (const_int 2)
10427 (const_int 4)
10428 (const_int 6)])))
10429 (sign_extend:V4SI
10430 (vec_select:V4HI
10431 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10432 (parallel [(const_int 0)
10433 (const_int 2)
10434 (const_int 4)
10435 (const_int 6)]))))
10436 (mult:V4SI
10437 (sign_extend:V4SI
10438 (vec_select:V4HI
10439 (match_dup 1)
10440 (parallel [(const_int 1)
10441 (const_int 3)
10442 (const_int 5)
10443 (const_int 7)])))
10444 (sign_extend:V4SI
10445 (vec_select:V4HI
10446 (match_dup 2)
10447 (parallel [(const_int 1)
10448 (const_int 3)
10449 (const_int 5)
10450 (const_int 7)])))))
10451 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10452 "TARGET_XOP"
10453 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10454 [(set_attr "type" "ssemuladd")
10455 (set_attr "mode" "TI")])
10456
10457 ;; XOP parallel XMM conditional moves
10458 (define_insn "xop_pcmov_<mode>"
10459 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10460 (if_then_else:SSEMODE
10461 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10462 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10463 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10464 "TARGET_XOP"
10465 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10466 [(set_attr "type" "sse4arg")])
10467
10468 (define_insn "xop_pcmov_<mode>256"
10469 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10470 (if_then_else:AVX256MODE
10471 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10472 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10473 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10474 "TARGET_XOP"
10475 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10476 [(set_attr "type" "sse4arg")])
10477
10478 ;; XOP horizontal add/subtract instructions
10479 (define_insn "xop_phaddbw"
10480 [(set (match_operand:V8HI 0 "register_operand" "=x")
10481 (plus:V8HI
10482 (sign_extend:V8HI
10483 (vec_select:V8QI
10484 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10485 (parallel [(const_int 0)
10486 (const_int 2)
10487 (const_int 4)
10488 (const_int 6)
10489 (const_int 8)
10490 (const_int 10)
10491 (const_int 12)
10492 (const_int 14)])))
10493 (sign_extend:V8HI
10494 (vec_select:V8QI
10495 (match_dup 1)
10496 (parallel [(const_int 1)
10497 (const_int 3)
10498 (const_int 5)
10499 (const_int 7)
10500 (const_int 9)
10501 (const_int 11)
10502 (const_int 13)
10503 (const_int 15)])))))]
10504 "TARGET_XOP"
10505 "vphaddbw\t{%1, %0|%0, %1}"
10506 [(set_attr "type" "sseiadd1")])
10507
10508 (define_insn "xop_phaddbd"
10509 [(set (match_operand:V4SI 0 "register_operand" "=x")
10510 (plus:V4SI
10511 (plus:V4SI
10512 (sign_extend:V4SI
10513 (vec_select:V4QI
10514 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10515 (parallel [(const_int 0)
10516 (const_int 4)
10517 (const_int 8)
10518 (const_int 12)])))
10519 (sign_extend:V4SI
10520 (vec_select:V4QI
10521 (match_dup 1)
10522 (parallel [(const_int 1)
10523 (const_int 5)
10524 (const_int 9)
10525 (const_int 13)]))))
10526 (plus:V4SI
10527 (sign_extend:V4SI
10528 (vec_select:V4QI
10529 (match_dup 1)
10530 (parallel [(const_int 2)
10531 (const_int 6)
10532 (const_int 10)
10533 (const_int 14)])))
10534 (sign_extend:V4SI
10535 (vec_select:V4QI
10536 (match_dup 1)
10537 (parallel [(const_int 3)
10538 (const_int 7)
10539 (const_int 11)
10540 (const_int 15)]))))))]
10541 "TARGET_XOP"
10542 "vphaddbd\t{%1, %0|%0, %1}"
10543 [(set_attr "type" "sseiadd1")])
10544
10545 (define_insn "xop_phaddbq"
10546 [(set (match_operand:V2DI 0 "register_operand" "=x")
10547 (plus:V2DI
10548 (plus:V2DI
10549 (plus:V2DI
10550 (sign_extend:V2DI
10551 (vec_select:V2QI
10552 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10553 (parallel [(const_int 0)
10554 (const_int 4)])))
10555 (sign_extend:V2DI
10556 (vec_select:V2QI
10557 (match_dup 1)
10558 (parallel [(const_int 1)
10559 (const_int 5)]))))
10560 (plus:V2DI
10561 (sign_extend:V2DI
10562 (vec_select:V2QI
10563 (match_dup 1)
10564 (parallel [(const_int 2)
10565 (const_int 6)])))
10566 (sign_extend:V2DI
10567 (vec_select:V2QI
10568 (match_dup 1)
10569 (parallel [(const_int 3)
10570 (const_int 7)])))))
10571 (plus:V2DI
10572 (plus:V2DI
10573 (sign_extend:V2DI
10574 (vec_select:V2QI
10575 (match_dup 1)
10576 (parallel [(const_int 8)
10577 (const_int 12)])))
10578 (sign_extend:V2DI
10579 (vec_select:V2QI
10580 (match_dup 1)
10581 (parallel [(const_int 9)
10582 (const_int 13)]))))
10583 (plus:V2DI
10584 (sign_extend:V2DI
10585 (vec_select:V2QI
10586 (match_dup 1)
10587 (parallel [(const_int 10)
10588 (const_int 14)])))
10589 (sign_extend:V2DI
10590 (vec_select:V2QI
10591 (match_dup 1)
10592 (parallel [(const_int 11)
10593 (const_int 15)])))))))]
10594 "TARGET_XOP"
10595 "vphaddbq\t{%1, %0|%0, %1}"
10596 [(set_attr "type" "sseiadd1")])
10597
10598 (define_insn "xop_phaddwd"
10599 [(set (match_operand:V4SI 0 "register_operand" "=x")
10600 (plus:V4SI
10601 (sign_extend:V4SI
10602 (vec_select:V4HI
10603 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10604 (parallel [(const_int 0)
10605 (const_int 2)
10606 (const_int 4)
10607 (const_int 6)])))
10608 (sign_extend:V4SI
10609 (vec_select:V4HI
10610 (match_dup 1)
10611 (parallel [(const_int 1)
10612 (const_int 3)
10613 (const_int 5)
10614 (const_int 7)])))))]
10615 "TARGET_XOP"
10616 "vphaddwd\t{%1, %0|%0, %1}"
10617 [(set_attr "type" "sseiadd1")])
10618
10619 (define_insn "xop_phaddwq"
10620 [(set (match_operand:V2DI 0 "register_operand" "=x")
10621 (plus:V2DI
10622 (plus:V2DI
10623 (sign_extend:V2DI
10624 (vec_select:V2HI
10625 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10626 (parallel [(const_int 0)
10627 (const_int 4)])))
10628 (sign_extend:V2DI
10629 (vec_select:V2HI
10630 (match_dup 1)
10631 (parallel [(const_int 1)
10632 (const_int 5)]))))
10633 (plus:V2DI
10634 (sign_extend:V2DI
10635 (vec_select:V2HI
10636 (match_dup 1)
10637 (parallel [(const_int 2)
10638 (const_int 6)])))
10639 (sign_extend:V2DI
10640 (vec_select:V2HI
10641 (match_dup 1)
10642 (parallel [(const_int 3)
10643 (const_int 7)]))))))]
10644 "TARGET_XOP"
10645 "vphaddwq\t{%1, %0|%0, %1}"
10646 [(set_attr "type" "sseiadd1")])
10647
10648 (define_insn "xop_phadddq"
10649 [(set (match_operand:V2DI 0 "register_operand" "=x")
10650 (plus:V2DI
10651 (sign_extend:V2DI
10652 (vec_select:V2SI
10653 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10654 (parallel [(const_int 0)
10655 (const_int 2)])))
10656 (sign_extend:V2DI
10657 (vec_select:V2SI
10658 (match_dup 1)
10659 (parallel [(const_int 1)
10660 (const_int 3)])))))]
10661 "TARGET_XOP"
10662 "vphadddq\t{%1, %0|%0, %1}"
10663 [(set_attr "type" "sseiadd1")])
10664
10665 (define_insn "xop_phaddubw"
10666 [(set (match_operand:V8HI 0 "register_operand" "=x")
10667 (plus:V8HI
10668 (zero_extend:V8HI
10669 (vec_select:V8QI
10670 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10671 (parallel [(const_int 0)
10672 (const_int 2)
10673 (const_int 4)
10674 (const_int 6)
10675 (const_int 8)
10676 (const_int 10)
10677 (const_int 12)
10678 (const_int 14)])))
10679 (zero_extend:V8HI
10680 (vec_select:V8QI
10681 (match_dup 1)
10682 (parallel [(const_int 1)
10683 (const_int 3)
10684 (const_int 5)
10685 (const_int 7)
10686 (const_int 9)
10687 (const_int 11)
10688 (const_int 13)
10689 (const_int 15)])))))]
10690 "TARGET_XOP"
10691 "vphaddubw\t{%1, %0|%0, %1}"
10692 [(set_attr "type" "sseiadd1")])
10693
10694 (define_insn "xop_phaddubd"
10695 [(set (match_operand:V4SI 0 "register_operand" "=x")
10696 (plus:V4SI
10697 (plus:V4SI
10698 (zero_extend:V4SI
10699 (vec_select:V4QI
10700 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10701 (parallel [(const_int 0)
10702 (const_int 4)
10703 (const_int 8)
10704 (const_int 12)])))
10705 (zero_extend:V4SI
10706 (vec_select:V4QI
10707 (match_dup 1)
10708 (parallel [(const_int 1)
10709 (const_int 5)
10710 (const_int 9)
10711 (const_int 13)]))))
10712 (plus:V4SI
10713 (zero_extend:V4SI
10714 (vec_select:V4QI
10715 (match_dup 1)
10716 (parallel [(const_int 2)
10717 (const_int 6)
10718 (const_int 10)
10719 (const_int 14)])))
10720 (zero_extend:V4SI
10721 (vec_select:V4QI
10722 (match_dup 1)
10723 (parallel [(const_int 3)
10724 (const_int 7)
10725 (const_int 11)
10726 (const_int 15)]))))))]
10727 "TARGET_XOP"
10728 "vphaddubd\t{%1, %0|%0, %1}"
10729 [(set_attr "type" "sseiadd1")])
10730
10731 (define_insn "xop_phaddubq"
10732 [(set (match_operand:V2DI 0 "register_operand" "=x")
10733 (plus:V2DI
10734 (plus:V2DI
10735 (plus:V2DI
10736 (zero_extend:V2DI
10737 (vec_select:V2QI
10738 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10739 (parallel [(const_int 0)
10740 (const_int 4)])))
10741 (sign_extend:V2DI
10742 (vec_select:V2QI
10743 (match_dup 1)
10744 (parallel [(const_int 1)
10745 (const_int 5)]))))
10746 (plus:V2DI
10747 (zero_extend:V2DI
10748 (vec_select:V2QI
10749 (match_dup 1)
10750 (parallel [(const_int 2)
10751 (const_int 6)])))
10752 (zero_extend:V2DI
10753 (vec_select:V2QI
10754 (match_dup 1)
10755 (parallel [(const_int 3)
10756 (const_int 7)])))))
10757 (plus:V2DI
10758 (plus:V2DI
10759 (zero_extend:V2DI
10760 (vec_select:V2QI
10761 (match_dup 1)
10762 (parallel [(const_int 8)
10763 (const_int 12)])))
10764 (sign_extend:V2DI
10765 (vec_select:V2QI
10766 (match_dup 1)
10767 (parallel [(const_int 9)
10768 (const_int 13)]))))
10769 (plus:V2DI
10770 (zero_extend:V2DI
10771 (vec_select:V2QI
10772 (match_dup 1)
10773 (parallel [(const_int 10)
10774 (const_int 14)])))
10775 (zero_extend:V2DI
10776 (vec_select:V2QI
10777 (match_dup 1)
10778 (parallel [(const_int 11)
10779 (const_int 15)])))))))]
10780 "TARGET_XOP"
10781 "vphaddubq\t{%1, %0|%0, %1}"
10782 [(set_attr "type" "sseiadd1")])
10783
10784 (define_insn "xop_phadduwd"
10785 [(set (match_operand:V4SI 0 "register_operand" "=x")
10786 (plus:V4SI
10787 (zero_extend:V4SI
10788 (vec_select:V4HI
10789 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10790 (parallel [(const_int 0)
10791 (const_int 2)
10792 (const_int 4)
10793 (const_int 6)])))
10794 (zero_extend:V4SI
10795 (vec_select:V4HI
10796 (match_dup 1)
10797 (parallel [(const_int 1)
10798 (const_int 3)
10799 (const_int 5)
10800 (const_int 7)])))))]
10801 "TARGET_XOP"
10802 "vphadduwd\t{%1, %0|%0, %1}"
10803 [(set_attr "type" "sseiadd1")])
10804
10805 (define_insn "xop_phadduwq"
10806 [(set (match_operand:V2DI 0 "register_operand" "=x")
10807 (plus:V2DI
10808 (plus:V2DI
10809 (zero_extend:V2DI
10810 (vec_select:V2HI
10811 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10812 (parallel [(const_int 0)
10813 (const_int 4)])))
10814 (zero_extend:V2DI
10815 (vec_select:V2HI
10816 (match_dup 1)
10817 (parallel [(const_int 1)
10818 (const_int 5)]))))
10819 (plus:V2DI
10820 (zero_extend:V2DI
10821 (vec_select:V2HI
10822 (match_dup 1)
10823 (parallel [(const_int 2)
10824 (const_int 6)])))
10825 (zero_extend:V2DI
10826 (vec_select:V2HI
10827 (match_dup 1)
10828 (parallel [(const_int 3)
10829 (const_int 7)]))))))]
10830 "TARGET_XOP"
10831 "vphadduwq\t{%1, %0|%0, %1}"
10832 [(set_attr "type" "sseiadd1")])
10833
10834 (define_insn "xop_phaddudq"
10835 [(set (match_operand:V2DI 0 "register_operand" "=x")
10836 (plus:V2DI
10837 (zero_extend:V2DI
10838 (vec_select:V2SI
10839 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10840 (parallel [(const_int 0)
10841 (const_int 2)])))
10842 (zero_extend:V2DI
10843 (vec_select:V2SI
10844 (match_dup 1)
10845 (parallel [(const_int 1)
10846 (const_int 3)])))))]
10847 "TARGET_XOP"
10848 "vphaddudq\t{%1, %0|%0, %1}"
10849 [(set_attr "type" "sseiadd1")])
10850
10851 (define_insn "xop_phsubbw"
10852 [(set (match_operand:V8HI 0 "register_operand" "=x")
10853 (minus:V8HI
10854 (sign_extend:V8HI
10855 (vec_select:V8QI
10856 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10857 (parallel [(const_int 0)
10858 (const_int 2)
10859 (const_int 4)
10860 (const_int 6)
10861 (const_int 8)
10862 (const_int 10)
10863 (const_int 12)
10864 (const_int 14)])))
10865 (sign_extend:V8HI
10866 (vec_select:V8QI
10867 (match_dup 1)
10868 (parallel [(const_int 1)
10869 (const_int 3)
10870 (const_int 5)
10871 (const_int 7)
10872 (const_int 9)
10873 (const_int 11)
10874 (const_int 13)
10875 (const_int 15)])))))]
10876 "TARGET_XOP"
10877 "vphsubbw\t{%1, %0|%0, %1}"
10878 [(set_attr "type" "sseiadd1")])
10879
10880 (define_insn "xop_phsubwd"
10881 [(set (match_operand:V4SI 0 "register_operand" "=x")
10882 (minus:V4SI
10883 (sign_extend:V4SI
10884 (vec_select:V4HI
10885 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10886 (parallel [(const_int 0)
10887 (const_int 2)
10888 (const_int 4)
10889 (const_int 6)])))
10890 (sign_extend:V4SI
10891 (vec_select:V4HI
10892 (match_dup 1)
10893 (parallel [(const_int 1)
10894 (const_int 3)
10895 (const_int 5)
10896 (const_int 7)])))))]
10897 "TARGET_XOP"
10898 "vphsubwd\t{%1, %0|%0, %1}"
10899 [(set_attr "type" "sseiadd1")])
10900
10901 (define_insn "xop_phsubdq"
10902 [(set (match_operand:V2DI 0 "register_operand" "=x")
10903 (minus:V2DI
10904 (sign_extend:V2DI
10905 (vec_select:V2SI
10906 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10907 (parallel [(const_int 0)
10908 (const_int 2)])))
10909 (sign_extend:V2DI
10910 (vec_select:V2SI
10911 (match_dup 1)
10912 (parallel [(const_int 1)
10913 (const_int 3)])))))]
10914 "TARGET_XOP"
10915 "vphsubdq\t{%1, %0|%0, %1}"
10916 [(set_attr "type" "sseiadd1")])
10917
10918 ;; XOP permute instructions
10919 (define_insn "xop_pperm"
10920 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10921 (unspec:V16QI
10922 [(match_operand:V16QI 1 "register_operand" "x,x")
10923 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10924 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10925 UNSPEC_XOP_PERMUTE))]
10926 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10927 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10928 [(set_attr "type" "sse4arg")
10929 (set_attr "mode" "TI")])
10930
10931 ;; XOP pack instructions that combine two vectors into a smaller vector
10932 (define_insn "xop_pperm_pack_v2di_v4si"
10933 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10934 (vec_concat:V4SI
10935 (truncate:V2SI
10936 (match_operand:V2DI 1 "register_operand" "x,x"))
10937 (truncate:V2SI
10938 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10939 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10940 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10941 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10942 [(set_attr "type" "sse4arg")
10943 (set_attr "mode" "TI")])
10944
10945 (define_insn "xop_pperm_pack_v4si_v8hi"
10946 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10947 (vec_concat:V8HI
10948 (truncate:V4HI
10949 (match_operand:V4SI 1 "register_operand" "x,x"))
10950 (truncate:V4HI
10951 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10952 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10953 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10954 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10955 [(set_attr "type" "sse4arg")
10956 (set_attr "mode" "TI")])
10957
10958 (define_insn "xop_pperm_pack_v8hi_v16qi"
10959 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10960 (vec_concat:V16QI
10961 (truncate:V8QI
10962 (match_operand:V8HI 1 "register_operand" "x,x"))
10963 (truncate:V8QI
10964 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10965 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10966 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10967 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10968 [(set_attr "type" "sse4arg")
10969 (set_attr "mode" "TI")])
10970
10971 ;; XOP packed rotate instructions
10972 (define_expand "rotl<mode>3"
10973 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10974 (rotate:SSEMODE1248
10975 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10976 (match_operand:SI 2 "general_operand")))]
10977 "TARGET_XOP"
10978 {
10979 /* If we were given a scalar, convert it to parallel */
10980 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10981 {
10982 rtvec vs = rtvec_alloc (<ssescalarnum>);
10983 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
10984 rtx reg = gen_reg_rtx (<MODE>mode);
10985 rtx op2 = operands[2];
10986 int i;
10987
10988 if (GET_MODE (op2) != <ssescalarmode>mode)
10989 {
10990 op2 = gen_reg_rtx (<ssescalarmode>mode);
10991 convert_move (op2, operands[2], false);
10992 }
10993
10994 for (i = 0; i < <ssescalarnum>; i++)
10995 RTVEC_ELT (vs, i) = op2;
10996
10997 emit_insn (gen_vec_init<mode> (reg, par));
10998 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10999 DONE;
11000 }
11001 })
11002
11003 (define_expand "rotr<mode>3"
11004 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11005 (rotatert:SSEMODE1248
11006 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11007 (match_operand:SI 2 "general_operand")))]
11008 "TARGET_XOP"
11009 {
11010 /* If we were given a scalar, convert it to parallel */
11011 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11012 {
11013 rtvec vs = rtvec_alloc (<ssescalarnum>);
11014 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11015 rtx neg = gen_reg_rtx (<MODE>mode);
11016 rtx reg = gen_reg_rtx (<MODE>mode);
11017 rtx op2 = operands[2];
11018 int i;
11019
11020 if (GET_MODE (op2) != <ssescalarmode>mode)
11021 {
11022 op2 = gen_reg_rtx (<ssescalarmode>mode);
11023 convert_move (op2, operands[2], false);
11024 }
11025
11026 for (i = 0; i < <ssescalarnum>; i++)
11027 RTVEC_ELT (vs, i) = op2;
11028
11029 emit_insn (gen_vec_init<mode> (reg, par));
11030 emit_insn (gen_neg<mode>2 (neg, reg));
11031 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11032 DONE;
11033 }
11034 })
11035
11036 (define_insn "xop_rotl<mode>3"
11037 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11038 (rotate:SSEMODE1248
11039 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11040 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11041 "TARGET_XOP"
11042 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11043 [(set_attr "type" "sseishft")
11044 (set_attr "length_immediate" "1")
11045 (set_attr "mode" "TI")])
11046
11047 (define_insn "xop_rotr<mode>3"
11048 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11049 (rotatert:SSEMODE1248
11050 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11051 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11052 "TARGET_XOP"
11053 {
11054 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11055 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11056 }
11057 [(set_attr "type" "sseishft")
11058 (set_attr "length_immediate" "1")
11059 (set_attr "mode" "TI")])
11060
11061 (define_expand "vrotr<mode>3"
11062 [(match_operand:SSEMODE1248 0 "register_operand" "")
11063 (match_operand:SSEMODE1248 1 "register_operand" "")
11064 (match_operand:SSEMODE1248 2 "register_operand" "")]
11065 "TARGET_XOP"
11066 {
11067 rtx reg = gen_reg_rtx (<MODE>mode);
11068 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11069 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11070 DONE;
11071 })
11072
11073 (define_expand "vrotl<mode>3"
11074 [(match_operand:SSEMODE1248 0 "register_operand" "")
11075 (match_operand:SSEMODE1248 1 "register_operand" "")
11076 (match_operand:SSEMODE1248 2 "register_operand" "")]
11077 "TARGET_XOP"
11078 {
11079 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11080 DONE;
11081 })
11082
11083 (define_insn "xop_vrotl<mode>3"
11084 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11085 (if_then_else:SSEMODE1248
11086 (ge:SSEMODE1248
11087 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11088 (const_int 0))
11089 (rotate:SSEMODE1248
11090 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11091 (match_dup 2))
11092 (rotatert:SSEMODE1248
11093 (match_dup 1)
11094 (neg:SSEMODE1248 (match_dup 2)))))]
11095 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11096 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11097 [(set_attr "type" "sseishft")
11098 (set_attr "prefix_data16" "0")
11099 (set_attr "prefix_extra" "2")
11100 (set_attr "mode" "TI")])
11101
11102 ;; XOP packed shift instructions.
11103 ;; FIXME: add V2DI back in
11104 (define_expand "vlshr<mode>3"
11105 [(match_operand:SSEMODE124 0 "register_operand" "")
11106 (match_operand:SSEMODE124 1 "register_operand" "")
11107 (match_operand:SSEMODE124 2 "register_operand" "")]
11108 "TARGET_XOP"
11109 {
11110 rtx neg = gen_reg_rtx (<MODE>mode);
11111 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11112 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11113 DONE;
11114 })
11115
11116 (define_expand "vashr<mode>3"
11117 [(match_operand:SSEMODE124 0 "register_operand" "")
11118 (match_operand:SSEMODE124 1 "register_operand" "")
11119 (match_operand:SSEMODE124 2 "register_operand" "")]
11120 "TARGET_XOP"
11121 {
11122 rtx neg = gen_reg_rtx (<MODE>mode);
11123 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11124 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11125 DONE;
11126 })
11127
11128 (define_expand "vashl<mode>3"
11129 [(match_operand:SSEMODE124 0 "register_operand" "")
11130 (match_operand:SSEMODE124 1 "register_operand" "")
11131 (match_operand:SSEMODE124 2 "register_operand" "")]
11132 "TARGET_XOP"
11133 {
11134 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11135 DONE;
11136 })
11137
11138 (define_insn "xop_ashl<mode>3"
11139 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11140 (if_then_else:SSEMODE1248
11141 (ge:SSEMODE1248
11142 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11143 (const_int 0))
11144 (ashift:SSEMODE1248
11145 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11146 (match_dup 2))
11147 (ashiftrt:SSEMODE1248
11148 (match_dup 1)
11149 (neg:SSEMODE1248 (match_dup 2)))))]
11150 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11151 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11152 [(set_attr "type" "sseishft")
11153 (set_attr "prefix_data16" "0")
11154 (set_attr "prefix_extra" "2")
11155 (set_attr "mode" "TI")])
11156
11157 (define_insn "xop_lshl<mode>3"
11158 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11159 (if_then_else:SSEMODE1248
11160 (ge:SSEMODE1248
11161 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11162 (const_int 0))
11163 (ashift:SSEMODE1248
11164 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11165 (match_dup 2))
11166 (lshiftrt:SSEMODE1248
11167 (match_dup 1)
11168 (neg:SSEMODE1248 (match_dup 2)))))]
11169 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11170 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11171 [(set_attr "type" "sseishft")
11172 (set_attr "prefix_data16" "0")
11173 (set_attr "prefix_extra" "2")
11174 (set_attr "mode" "TI")])
11175
11176 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11177 (define_expand "ashlv16qi3"
11178 [(match_operand:V16QI 0 "register_operand" "")
11179 (match_operand:V16QI 1 "register_operand" "")
11180 (match_operand:SI 2 "nonmemory_operand" "")]
11181 "TARGET_XOP"
11182 {
11183 rtvec vs = rtvec_alloc (16);
11184 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11185 rtx reg = gen_reg_rtx (V16QImode);
11186 int i;
11187 for (i = 0; i < 16; i++)
11188 RTVEC_ELT (vs, i) = operands[2];
11189
11190 emit_insn (gen_vec_initv16qi (reg, par));
11191 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11192 DONE;
11193 })
11194
11195 (define_expand "lshlv16qi3"
11196 [(match_operand:V16QI 0 "register_operand" "")
11197 (match_operand:V16QI 1 "register_operand" "")
11198 (match_operand:SI 2 "nonmemory_operand" "")]
11199 "TARGET_XOP"
11200 {
11201 rtvec vs = rtvec_alloc (16);
11202 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11203 rtx reg = gen_reg_rtx (V16QImode);
11204 int i;
11205 for (i = 0; i < 16; i++)
11206 RTVEC_ELT (vs, i) = operands[2];
11207
11208 emit_insn (gen_vec_initv16qi (reg, par));
11209 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11210 DONE;
11211 })
11212
11213 (define_expand "ashrv16qi3"
11214 [(match_operand:V16QI 0 "register_operand" "")
11215 (match_operand:V16QI 1 "register_operand" "")
11216 (match_operand:SI 2 "nonmemory_operand" "")]
11217 "TARGET_XOP"
11218 {
11219 rtvec vs = rtvec_alloc (16);
11220 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11221 rtx reg = gen_reg_rtx (V16QImode);
11222 int i;
11223 rtx ele = ((CONST_INT_P (operands[2]))
11224 ? GEN_INT (- INTVAL (operands[2]))
11225 : operands[2]);
11226
11227 for (i = 0; i < 16; i++)
11228 RTVEC_ELT (vs, i) = ele;
11229
11230 emit_insn (gen_vec_initv16qi (reg, par));
11231
11232 if (!CONST_INT_P (operands[2]))
11233 {
11234 rtx neg = gen_reg_rtx (V16QImode);
11235 emit_insn (gen_negv16qi2 (neg, reg));
11236 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11237 }
11238 else
11239 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11240
11241 DONE;
11242 })
11243
11244 (define_expand "ashrv2di3"
11245 [(match_operand:V2DI 0 "register_operand" "")
11246 (match_operand:V2DI 1 "register_operand" "")
11247 (match_operand:DI 2 "nonmemory_operand" "")]
11248 "TARGET_XOP"
11249 {
11250 rtvec vs = rtvec_alloc (2);
11251 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11252 rtx reg = gen_reg_rtx (V2DImode);
11253 rtx ele;
11254
11255 if (CONST_INT_P (operands[2]))
11256 ele = GEN_INT (- INTVAL (operands[2]));
11257 else if (GET_MODE (operands[2]) != DImode)
11258 {
11259 rtx move = gen_reg_rtx (DImode);
11260 ele = gen_reg_rtx (DImode);
11261 convert_move (move, operands[2], false);
11262 emit_insn (gen_negdi2 (ele, move));
11263 }
11264 else
11265 {
11266 ele = gen_reg_rtx (DImode);
11267 emit_insn (gen_negdi2 (ele, operands[2]));
11268 }
11269
11270 RTVEC_ELT (vs, 0) = ele;
11271 RTVEC_ELT (vs, 1) = ele;
11272 emit_insn (gen_vec_initv2di (reg, par));
11273 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11274 DONE;
11275 })
11276
11277 ;; XOP FRCZ support
11278 (define_insn "xop_frcz<mode>2"
11279 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11280 (unspec:FMAMODE
11281 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11282 UNSPEC_FRCZ))]
11283 "TARGET_XOP"
11284 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11285 [(set_attr "type" "ssecvt1")
11286 (set_attr "mode" "<MODE>")])
11287
11288 ;; scalar insns
11289 (define_expand "xop_vmfrcz<mode>2"
11290 [(set (match_operand:SSEMODEF2P 0 "register_operand")
11291 (vec_merge:SSEMODEF2P
11292 (unspec:SSEMODEF2P
11293 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand")]
11294 UNSPEC_FRCZ)
11295 (match_dup 3)
11296 (const_int 1)))]
11297 "TARGET_XOP"
11298 {
11299 operands[3] = CONST0_RTX (<MODE>mode);
11300 })
11301
11302 (define_insn "*xop_vmfrcz_<mode>"
11303 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11304 (vec_merge:SSEMODEF2P
11305 (unspec:SSEMODEF2P
11306 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11307 UNSPEC_FRCZ)
11308 (match_operand:SSEMODEF2P 2 "const0_operand")
11309 (const_int 1)))]
11310 "TARGET_XOP"
11311 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11312 [(set_attr "type" "ssecvt1")
11313 (set_attr "mode" "<MODE>")])
11314
11315 (define_insn "xop_maskcmp<mode>3"
11316 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11317 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11318 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11319 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11320 "TARGET_XOP"
11321 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11322 [(set_attr "type" "sse4arg")
11323 (set_attr "prefix_data16" "0")
11324 (set_attr "prefix_rep" "0")
11325 (set_attr "prefix_extra" "2")
11326 (set_attr "length_immediate" "1")
11327 (set_attr "mode" "TI")])
11328
11329 (define_insn "xop_maskcmp_uns<mode>3"
11330 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11331 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11332 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11333 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11334 "TARGET_XOP"
11335 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11336 [(set_attr "type" "ssecmp")
11337 (set_attr "prefix_data16" "0")
11338 (set_attr "prefix_rep" "0")
11339 (set_attr "prefix_extra" "2")
11340 (set_attr "length_immediate" "1")
11341 (set_attr "mode" "TI")])
11342
11343 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11344 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11345 ;; the exact instruction generated for the intrinsic.
11346 (define_insn "xop_maskcmp_uns2<mode>3"
11347 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11348 (unspec:SSEMODE1248
11349 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11350 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11351 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11352 UNSPEC_XOP_UNSIGNED_CMP))]
11353 "TARGET_XOP"
11354 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11355 [(set_attr "type" "ssecmp")
11356 (set_attr "prefix_data16" "0")
11357 (set_attr "prefix_extra" "2")
11358 (set_attr "length_immediate" "1")
11359 (set_attr "mode" "TI")])
11360
11361 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11362 ;; being added here to be complete.
11363 (define_insn "xop_pcom_tf<mode>3"
11364 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11365 (unspec:SSEMODE1248
11366 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11367 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11368 (match_operand:SI 3 "const_int_operand" "n")]
11369 UNSPEC_XOP_TRUEFALSE))]
11370 "TARGET_XOP"
11371 {
11372 return ((INTVAL (operands[3]) != 0)
11373 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11374 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11375 }
11376 [(set_attr "type" "ssecmp")
11377 (set_attr "prefix_data16" "0")
11378 (set_attr "prefix_extra" "2")
11379 (set_attr "length_immediate" "1")
11380 (set_attr "mode" "TI")])
11381
11382 (define_insn "xop_vpermil2<mode>3"
11383 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11384 (unspec:AVXMODEF2P
11385 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11386 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11387 (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm")
11388 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11389 UNSPEC_VPERMIL2))]
11390 "TARGET_XOP"
11391 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11392 [(set_attr "type" "sse4arg")
11393 (set_attr "length_immediate" "1")
11394 (set_attr "mode" "<MODE>")])
11395
11396 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11397 (define_insn "*avx_aesenc"
11398 [(set (match_operand:V2DI 0 "register_operand" "=x")
11399 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11400 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11401 UNSPEC_AESENC))]
11402 "TARGET_AES && TARGET_AVX"
11403 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11404 [(set_attr "type" "sselog1")
11405 (set_attr "prefix_extra" "1")
11406 (set_attr "prefix" "vex")
11407 (set_attr "mode" "TI")])
11408
11409 (define_insn "aesenc"
11410 [(set (match_operand:V2DI 0 "register_operand" "=x")
11411 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11412 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11413 UNSPEC_AESENC))]
11414 "TARGET_AES"
11415 "aesenc\t{%2, %0|%0, %2}"
11416 [(set_attr "type" "sselog1")
11417 (set_attr "prefix_extra" "1")
11418 (set_attr "mode" "TI")])
11419
11420 (define_insn "*avx_aesenclast"
11421 [(set (match_operand:V2DI 0 "register_operand" "=x")
11422 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11423 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11424 UNSPEC_AESENCLAST))]
11425 "TARGET_AES && TARGET_AVX"
11426 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11427 [(set_attr "type" "sselog1")
11428 (set_attr "prefix_extra" "1")
11429 (set_attr "prefix" "vex")
11430 (set_attr "mode" "TI")])
11431
11432 (define_insn "aesenclast"
11433 [(set (match_operand:V2DI 0 "register_operand" "=x")
11434 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11435 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11436 UNSPEC_AESENCLAST))]
11437 "TARGET_AES"
11438 "aesenclast\t{%2, %0|%0, %2}"
11439 [(set_attr "type" "sselog1")
11440 (set_attr "prefix_extra" "1")
11441 (set_attr "mode" "TI")])
11442
11443 (define_insn "*avx_aesdec"
11444 [(set (match_operand:V2DI 0 "register_operand" "=x")
11445 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11446 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11447 UNSPEC_AESDEC))]
11448 "TARGET_AES && TARGET_AVX"
11449 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11450 [(set_attr "type" "sselog1")
11451 (set_attr "prefix_extra" "1")
11452 (set_attr "prefix" "vex")
11453 (set_attr "mode" "TI")])
11454
11455 (define_insn "aesdec"
11456 [(set (match_operand:V2DI 0 "register_operand" "=x")
11457 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11458 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11459 UNSPEC_AESDEC))]
11460 "TARGET_AES"
11461 "aesdec\t{%2, %0|%0, %2}"
11462 [(set_attr "type" "sselog1")
11463 (set_attr "prefix_extra" "1")
11464 (set_attr "mode" "TI")])
11465
11466 (define_insn "*avx_aesdeclast"
11467 [(set (match_operand:V2DI 0 "register_operand" "=x")
11468 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11469 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11470 UNSPEC_AESDECLAST))]
11471 "TARGET_AES && TARGET_AVX"
11472 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11473 [(set_attr "type" "sselog1")
11474 (set_attr "prefix_extra" "1")
11475 (set_attr "prefix" "vex")
11476 (set_attr "mode" "TI")])
11477
11478 (define_insn "aesdeclast"
11479 [(set (match_operand:V2DI 0 "register_operand" "=x")
11480 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11481 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11482 UNSPEC_AESDECLAST))]
11483 "TARGET_AES"
11484 "aesdeclast\t{%2, %0|%0, %2}"
11485 [(set_attr "type" "sselog1")
11486 (set_attr "prefix_extra" "1")
11487 (set_attr "mode" "TI")])
11488
11489 (define_insn "aesimc"
11490 [(set (match_operand:V2DI 0 "register_operand" "=x")
11491 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11492 UNSPEC_AESIMC))]
11493 "TARGET_AES"
11494 "%vaesimc\t{%1, %0|%0, %1}"
11495 [(set_attr "type" "sselog1")
11496 (set_attr "prefix_extra" "1")
11497 (set_attr "prefix" "maybe_vex")
11498 (set_attr "mode" "TI")])
11499
11500 (define_insn "aeskeygenassist"
11501 [(set (match_operand:V2DI 0 "register_operand" "=x")
11502 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11503 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11504 UNSPEC_AESKEYGENASSIST))]
11505 "TARGET_AES"
11506 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11507 [(set_attr "type" "sselog1")
11508 (set_attr "prefix_extra" "1")
11509 (set_attr "length_immediate" "1")
11510 (set_attr "prefix" "maybe_vex")
11511 (set_attr "mode" "TI")])
11512
11513 (define_insn "*vpclmulqdq"
11514 [(set (match_operand:V2DI 0 "register_operand" "=x")
11515 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11516 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11517 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11518 UNSPEC_PCLMUL))]
11519 "TARGET_PCLMUL && TARGET_AVX"
11520 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11521 [(set_attr "type" "sselog1")
11522 (set_attr "prefix_extra" "1")
11523 (set_attr "length_immediate" "1")
11524 (set_attr "prefix" "vex")
11525 (set_attr "mode" "TI")])
11526
11527 (define_insn "pclmulqdq"
11528 [(set (match_operand:V2DI 0 "register_operand" "=x")
11529 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11530 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11531 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11532 UNSPEC_PCLMUL))]
11533 "TARGET_PCLMUL"
11534 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11535 [(set_attr "type" "sselog1")
11536 (set_attr "prefix_extra" "1")
11537 (set_attr "length_immediate" "1")
11538 (set_attr "mode" "TI")])
11539
11540 (define_expand "avx_vzeroall"
11541 [(match_par_dup 0 [(const_int 0)])]
11542 "TARGET_AVX"
11543 {
11544 int nregs = TARGET_64BIT ? 16 : 8;
11545 int regno;
11546
11547 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11548
11549 XVECEXP (operands[0], 0, 0)
11550 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11551 UNSPECV_VZEROALL);
11552
11553 for (regno = 0; regno < nregs; regno++)
11554 XVECEXP (operands[0], 0, regno + 1)
11555 = gen_rtx_SET (VOIDmode,
11556 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11557 CONST0_RTX (V8SImode));
11558 })
11559
11560 (define_insn "*avx_vzeroall"
11561 [(match_parallel 0 "vzeroall_operation"
11562 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11563 "TARGET_AVX"
11564 "vzeroall"
11565 [(set_attr "type" "sse")
11566 (set_attr "modrm" "0")
11567 (set_attr "memory" "none")
11568 (set_attr "prefix" "vex")
11569 (set_attr "mode" "OI")])
11570
11571 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11572 ;; if the upper 128bits are unused.
11573 (define_insn "avx_vzeroupper"
11574 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11575 UNSPECV_VZEROUPPER)]
11576 "TARGET_AVX"
11577 "vzeroupper"
11578 [(set_attr "type" "sse")
11579 (set_attr "modrm" "0")
11580 (set_attr "memory" "none")
11581 (set_attr "prefix" "vex")
11582 (set_attr "mode" "OI")])
11583
11584 (define_insn_and_split "vec_dup<mode>"
11585 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11586 (vec_duplicate:AVX256MODE24P
11587 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11588 "TARGET_AVX"
11589 "@
11590 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11591 #"
11592 "&& reload_completed && REG_P (operands[1])"
11593 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11594 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11595 "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
11596 [(set_attr "type" "ssemov")
11597 (set_attr "prefix_extra" "1")
11598 (set_attr "prefix" "vex")
11599 (set_attr "mode" "V8SF")])
11600
11601 (define_insn "avx_vbroadcastf128_<mode>"
11602 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11603 (vec_concat:AVX256MODE
11604 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11605 (match_dup 1)))]
11606 "TARGET_AVX"
11607 "@
11608 vbroadcastf128\t{%1, %0|%0, %1}
11609 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11610 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11611 [(set_attr "type" "ssemov,sselog1,sselog1")
11612 (set_attr "prefix_extra" "1")
11613 (set_attr "length_immediate" "0,1,1")
11614 (set_attr "prefix" "vex")
11615 (set_attr "mode" "V4SF,V8SF,V8SF")])
11616
11617 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11618 ;; If it so happens that the input is in memory, use vbroadcast.
11619 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11620 (define_insn "*avx_vperm_broadcast_v4sf"
11621 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11622 (vec_select:V4SF
11623 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11624 (match_parallel 2 "avx_vbroadcast_operand"
11625 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11626 "TARGET_AVX"
11627 {
11628 int elt = INTVAL (operands[3]);
11629 switch (which_alternative)
11630 {
11631 case 0:
11632 case 1:
11633 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11634 return "vbroadcastss\t{%1, %0|%0, %1}";
11635 case 2:
11636 operands[2] = GEN_INT (elt * 0x55);
11637 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11638 default:
11639 gcc_unreachable ();
11640 }
11641 }
11642 [(set_attr "type" "ssemov,ssemov,sselog1")
11643 (set_attr "prefix_extra" "1")
11644 (set_attr "length_immediate" "0,0,1")
11645 (set_attr "prefix" "vex")
11646 (set_attr "mode" "SF,SF,V4SF")])
11647
11648 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11649 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11650 (vec_select:AVX256MODEF2P
11651 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11652 (match_parallel 2 "avx_vbroadcast_operand"
11653 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11654 "TARGET_AVX"
11655 "#"
11656 "&& reload_completed"
11657 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11658 {
11659 rtx op0 = operands[0], op1 = operands[1];
11660 int elt = INTVAL (operands[3]);
11661
11662 if (REG_P (op1))
11663 {
11664 int mask;
11665
11666 /* Shuffle element we care about into all elements of the 128-bit lane.
11667 The other lane gets shuffled too, but we don't care. */
11668 if (<MODE>mode == V4DFmode)
11669 mask = (elt & 1 ? 15 : 0);
11670 else
11671 mask = (elt & 3) * 0x55;
11672 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11673
11674 /* Shuffle the lane we care about into both lanes of the dest. */
11675 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11676 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11677 DONE;
11678 }
11679
11680 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11681 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11682 })
11683
11684 (define_expand "avx_vpermil<mode>"
11685 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11686 (vec_select:AVXMODEFDP
11687 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11688 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11689 "TARGET_AVX"
11690 {
11691 int mask = INTVAL (operands[2]);
11692 rtx perm[<ssescalarnum>];
11693
11694 perm[0] = GEN_INT (mask & 1);
11695 perm[1] = GEN_INT ((mask >> 1) & 1);
11696 if (<MODE>mode == V4DFmode)
11697 {
11698 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11699 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11700 }
11701
11702 operands[2]
11703 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11704 })
11705
11706 (define_expand "avx_vpermil<mode>"
11707 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11708 (vec_select:AVXMODEFSP
11709 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11710 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11711 "TARGET_AVX"
11712 {
11713 int mask = INTVAL (operands[2]);
11714 rtx perm[<ssescalarnum>];
11715
11716 perm[0] = GEN_INT (mask & 3);
11717 perm[1] = GEN_INT ((mask >> 2) & 3);
11718 perm[2] = GEN_INT ((mask >> 4) & 3);
11719 perm[3] = GEN_INT ((mask >> 6) & 3);
11720 if (<MODE>mode == V8SFmode)
11721 {
11722 perm[4] = GEN_INT ((mask & 3) + 4);
11723 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11724 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11725 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11726 }
11727
11728 operands[2]
11729 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11730 })
11731
11732 (define_insn "*avx_vpermilp<mode>"
11733 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11734 (vec_select:AVXMODEF2P
11735 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11736 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11737 [(match_operand 3 "const_int_operand" "")])))]
11738 "TARGET_AVX"
11739 {
11740 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11741 operands[2] = GEN_INT (mask);
11742 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11743 }
11744 [(set_attr "type" "sselog")
11745 (set_attr "prefix_extra" "1")
11746 (set_attr "length_immediate" "1")
11747 (set_attr "prefix" "vex")
11748 (set_attr "mode" "<MODE>")])
11749
11750 (define_insn "avx_vpermilvar<mode>3"
11751 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11752 (unspec:AVXMODEF2P
11753 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11754 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11755 UNSPEC_VPERMIL))]
11756 "TARGET_AVX"
11757 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11758 [(set_attr "type" "sselog")
11759 (set_attr "prefix_extra" "1")
11760 (set_attr "prefix" "vex")
11761 (set_attr "mode" "<MODE>")])
11762
11763 (define_expand "avx_vperm2f128<mode>3"
11764 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11765 (unspec:AVX256MODE2P
11766 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11767 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11768 (match_operand:SI 3 "const_0_to_255_operand" "")]
11769 UNSPEC_VPERMIL2F128))]
11770 "TARGET_AVX"
11771 {
11772 int mask = INTVAL (operands[3]);
11773 if ((mask & 0x88) == 0)
11774 {
11775 rtx perm[<ssescalarnum>], t1, t2;
11776 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11777
11778 base = (mask & 3) * nelt2;
11779 for (i = 0; i < nelt2; ++i)
11780 perm[i] = GEN_INT (base + i);
11781
11782 base = ((mask >> 4) & 3) * nelt2;
11783 for (i = 0; i < nelt2; ++i)
11784 perm[i + nelt2] = GEN_INT (base + i);
11785
11786 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
11787 operands[1], operands[2]);
11788 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11789 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11790 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11791 emit_insn (t2);
11792 DONE;
11793 }
11794 })
11795
11796 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11797 ;; means that in order to represent this properly in rtl we'd have to
11798 ;; nest *another* vec_concat with a zero operand and do the select from
11799 ;; a 4x wide vector. That doesn't seem very nice.
11800 (define_insn "*avx_vperm2f128<mode>_full"
11801 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11802 (unspec:AVX256MODE2P
11803 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11804 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11805 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11806 UNSPEC_VPERMIL2F128))]
11807 "TARGET_AVX"
11808 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11809 [(set_attr "type" "sselog")
11810 (set_attr "prefix_extra" "1")
11811 (set_attr "length_immediate" "1")
11812 (set_attr "prefix" "vex")
11813 (set_attr "mode" "V8SF")])
11814
11815 (define_insn "*avx_vperm2f128<mode>_nozero"
11816 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11817 (vec_select:AVX256MODE2P
11818 (vec_concat:<ssedoublesizemode>
11819 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11820 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11821 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
11822 [(match_operand 4 "const_int_operand" "")])))]
11823 "TARGET_AVX"
11824 {
11825 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11826 operands[3] = GEN_INT (mask);
11827 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11828 }
11829 [(set_attr "type" "sselog")
11830 (set_attr "prefix_extra" "1")
11831 (set_attr "length_immediate" "1")
11832 (set_attr "prefix" "vex")
11833 (set_attr "mode" "V8SF")])
11834
11835 (define_expand "avx_vinsertf128<mode>"
11836 [(match_operand:AVX256MODE 0 "register_operand" "")
11837 (match_operand:AVX256MODE 1 "register_operand" "")
11838 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11839 (match_operand:SI 3 "const_0_to_1_operand" "")]
11840 "TARGET_AVX"
11841 {
11842 switch (INTVAL (operands[3]))
11843 {
11844 case 0:
11845 emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
11846 operands[2]));
11847 break;
11848 case 1:
11849 emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
11850 operands[2]));
11851 break;
11852 default:
11853 gcc_unreachable ();
11854 }
11855 DONE;
11856 })
11857
11858 (define_insn "vec_set_lo_<mode>"
11859 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11860 (vec_concat:AVX256MODE4P
11861 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11862 (vec_select:<avxhalfvecmode>
11863 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11864 (parallel [(const_int 2) (const_int 3)]))))]
11865 "TARGET_AVX"
11866 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11867 [(set_attr "type" "sselog")
11868 (set_attr "prefix_extra" "1")
11869 (set_attr "length_immediate" "1")
11870 (set_attr "prefix" "vex")
11871 (set_attr "mode" "V8SF")])
11872
11873 (define_insn "vec_set_hi_<mode>"
11874 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11875 (vec_concat:AVX256MODE4P
11876 (vec_select:<avxhalfvecmode>
11877 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11878 (parallel [(const_int 0) (const_int 1)]))
11879 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11880 "TARGET_AVX"
11881 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11882 [(set_attr "type" "sselog")
11883 (set_attr "prefix_extra" "1")
11884 (set_attr "length_immediate" "1")
11885 (set_attr "prefix" "vex")
11886 (set_attr "mode" "V8SF")])
11887
11888 (define_insn "vec_set_lo_<mode>"
11889 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11890 (vec_concat:AVX256MODE8P
11891 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
11892 (vec_select:<avxhalfvecmode>
11893 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11894 (parallel [(const_int 4) (const_int 5)
11895 (const_int 6) (const_int 7)]))))]
11896 "TARGET_AVX"
11897 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11898 [(set_attr "type" "sselog")
11899 (set_attr "prefix_extra" "1")
11900 (set_attr "length_immediate" "1")
11901 (set_attr "prefix" "vex")
11902 (set_attr "mode" "V8SF")])
11903
11904 (define_insn "vec_set_hi_<mode>"
11905 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11906 (vec_concat:AVX256MODE8P
11907 (vec_select:<avxhalfvecmode>
11908 (match_operand:AVX256MODE8P 1 "register_operand" "x")
11909 (parallel [(const_int 0) (const_int 1)
11910 (const_int 2) (const_int 3)]))
11911 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11912 "TARGET_AVX"
11913 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11914 [(set_attr "type" "sselog")
11915 (set_attr "prefix_extra" "1")
11916 (set_attr "length_immediate" "1")
11917 (set_attr "prefix" "vex")
11918 (set_attr "mode" "V8SF")])
11919
11920 (define_insn "vec_set_lo_v16hi"
11921 [(set (match_operand:V16HI 0 "register_operand" "=x")
11922 (vec_concat:V16HI
11923 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
11924 (vec_select:V8HI
11925 (match_operand:V16HI 1 "register_operand" "x")
11926 (parallel [(const_int 8) (const_int 9)
11927 (const_int 10) (const_int 11)
11928 (const_int 12) (const_int 13)
11929 (const_int 14) (const_int 15)]))))]
11930 "TARGET_AVX"
11931 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11932 [(set_attr "type" "sselog")
11933 (set_attr "prefix_extra" "1")
11934 (set_attr "length_immediate" "1")
11935 (set_attr "prefix" "vex")
11936 (set_attr "mode" "V8SF")])
11937
11938 (define_insn "vec_set_hi_v16hi"
11939 [(set (match_operand:V16HI 0 "register_operand" "=x")
11940 (vec_concat:V16HI
11941 (vec_select:V8HI
11942 (match_operand:V16HI 1 "register_operand" "x")
11943 (parallel [(const_int 0) (const_int 1)
11944 (const_int 2) (const_int 3)
11945 (const_int 4) (const_int 5)
11946 (const_int 6) (const_int 7)]))
11947 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11948 "TARGET_AVX"
11949 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11950 [(set_attr "type" "sselog")
11951 (set_attr "prefix_extra" "1")
11952 (set_attr "length_immediate" "1")
11953 (set_attr "prefix" "vex")
11954 (set_attr "mode" "V8SF")])
11955
11956 (define_insn "vec_set_lo_v32qi"
11957 [(set (match_operand:V32QI 0 "register_operand" "=x")
11958 (vec_concat:V32QI
11959 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
11960 (vec_select:V16QI
11961 (match_operand:V32QI 1 "register_operand" "x")
11962 (parallel [(const_int 16) (const_int 17)
11963 (const_int 18) (const_int 19)
11964 (const_int 20) (const_int 21)
11965 (const_int 22) (const_int 23)
11966 (const_int 24) (const_int 25)
11967 (const_int 26) (const_int 27)
11968 (const_int 28) (const_int 29)
11969 (const_int 30) (const_int 31)]))))]
11970 "TARGET_AVX"
11971 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11972 [(set_attr "type" "sselog")
11973 (set_attr "prefix_extra" "1")
11974 (set_attr "length_immediate" "1")
11975 (set_attr "prefix" "vex")
11976 (set_attr "mode" "V8SF")])
11977
11978 (define_insn "vec_set_hi_v32qi"
11979 [(set (match_operand:V32QI 0 "register_operand" "=x")
11980 (vec_concat:V32QI
11981 (vec_select:V16QI
11982 (match_operand:V32QI 1 "register_operand" "x")
11983 (parallel [(const_int 0) (const_int 1)
11984 (const_int 2) (const_int 3)
11985 (const_int 4) (const_int 5)
11986 (const_int 6) (const_int 7)
11987 (const_int 8) (const_int 9)
11988 (const_int 10) (const_int 11)
11989 (const_int 12) (const_int 13)
11990 (const_int 14) (const_int 15)]))
11991 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11992 "TARGET_AVX"
11993 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11994 [(set_attr "type" "sselog")
11995 (set_attr "prefix_extra" "1")
11996 (set_attr "length_immediate" "1")
11997 (set_attr "prefix" "vex")
11998 (set_attr "mode" "V8SF")])
11999
12000 (define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
12001 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12002 (unspec:AVXMODEF2P
12003 [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12004 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12005 (match_dup 0)]
12006 UNSPEC_MASKLOAD))]
12007 "TARGET_AVX"
12008 "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12009 [(set_attr "type" "sselog1")
12010 (set_attr "prefix_extra" "1")
12011 (set_attr "prefix" "vex")
12012 (set_attr "mode" "<MODE>")])
12013
12014 (define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
12015 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12016 (unspec:AVXMODEF2P
12017 [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12018 (match_operand:AVXMODEF2P 2 "register_operand" "x")
12019 (match_dup 0)]
12020 UNSPEC_MASKSTORE))]
12021 "TARGET_AVX"
12022 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12023 [(set_attr "type" "sselog1")
12024 (set_attr "prefix_extra" "1")
12025 (set_attr "prefix" "vex")
12026 (set_attr "mode" "<MODE>")])
12027
12028 (define_insn_and_split "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
12029 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12030 (unspec:AVX256MODE2P
12031 [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12032 UNSPEC_CAST))]
12033 "TARGET_AVX"
12034 "#"
12035 "&& reload_completed"
12036 [(const_int 0)]
12037 {
12038 rtx op1 = operands[1];
12039 if (REG_P (op1))
12040 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12041 else
12042 op1 = gen_lowpart (<MODE>mode, op1);
12043 emit_move_insn (operands[0], op1);
12044 DONE;
12045 })
12046
12047 (define_expand "vec_init<mode>"
12048 [(match_operand:AVX256MODE 0 "register_operand" "")
12049 (match_operand 1 "" "")]
12050 "TARGET_AVX"
12051 {
12052 ix86_expand_vector_init (false, operands[0], operands[1]);
12053 DONE;
12054 })
12055
12056 (define_insn "*vec_concat<mode>_avx"
12057 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
12058 (vec_concat:AVX256MODE
12059 (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
12060 (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
12061 "TARGET_AVX"
12062 {
12063 switch (which_alternative)
12064 {
12065 case 0:
12066 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12067 case 1:
12068 switch (get_attr_mode (insn))
12069 {
12070 case MODE_V8SF:
12071 return "vmovaps\t{%1, %x0|%x0, %1}";
12072 case MODE_V4DF:
12073 return "vmovapd\t{%1, %x0|%x0, %1}";
12074 default:
12075 return "vmovdqa\t{%1, %x0|%x0, %1}";
12076 }
12077 default:
12078 gcc_unreachable ();
12079 }
12080 }
12081 [(set_attr "type" "sselog,ssemov")
12082 (set_attr "prefix_extra" "1,*")
12083 (set_attr "length_immediate" "1,*")
12084 (set_attr "prefix" "vex")
12085 (set_attr "mode" "<avxvecmode>")])
12086
12087 (define_insn "vcvtph2ps"
12088 [(set (match_operand:V4SF 0 "register_operand" "=x")
12089 (vec_select:V4SF
12090 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12091 UNSPEC_VCVTPH2PS)
12092 (parallel [(const_int 0) (const_int 1)
12093 (const_int 1) (const_int 2)])))]
12094 "TARGET_F16C"
12095 "vcvtph2ps\t{%1, %0|%0, %1}"
12096 [(set_attr "type" "ssecvt")
12097 (set_attr "prefix" "vex")
12098 (set_attr "mode" "V4SF")])
12099
12100 (define_insn "*vcvtph2ps_load"
12101 [(set (match_operand:V4SF 0 "register_operand" "=x")
12102 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12103 UNSPEC_VCVTPH2PS))]
12104 "TARGET_F16C"
12105 "vcvtph2ps\t{%1, %0|%0, %1}"
12106 [(set_attr "type" "ssecvt")
12107 (set_attr "prefix" "vex")
12108 (set_attr "mode" "V8SF")])
12109
12110 (define_insn "vcvtph2ps256"
12111 [(set (match_operand:V8SF 0 "register_operand" "=x")
12112 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12113 UNSPEC_VCVTPH2PS))]
12114 "TARGET_F16C"
12115 "vcvtph2ps\t{%1, %0|%0, %1}"
12116 [(set_attr "type" "ssecvt")
12117 (set_attr "prefix" "vex")
12118 (set_attr "mode" "V8SF")])
12119
12120 (define_expand "vcvtps2ph"
12121 [(set (match_operand:V8HI 0 "register_operand" "")
12122 (vec_concat:V8HI
12123 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12124 (match_operand:SI 2 "immediate_operand" "")]
12125 UNSPEC_VCVTPS2PH)
12126 (match_dup 3)))]
12127 "TARGET_F16C"
12128 "operands[3] = CONST0_RTX (V4HImode);")
12129
12130 (define_insn "*vcvtps2ph"
12131 [(set (match_operand:V8HI 0 "register_operand" "=x")
12132 (vec_concat:V8HI
12133 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12134 (match_operand:SI 2 "immediate_operand" "N")]
12135 UNSPEC_VCVTPS2PH)
12136 (match_operand:V4HI 3 "const0_operand" "")))]
12137 "TARGET_F16C"
12138 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12139 [(set_attr "type" "ssecvt")
12140 (set_attr "prefix" "vex")
12141 (set_attr "mode" "V4SF")])
12142
12143 (define_insn "*vcvtps2ph_store"
12144 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12145 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12146 (match_operand:SI 2 "immediate_operand" "N")]
12147 UNSPEC_VCVTPS2PH))]
12148 "TARGET_F16C"
12149 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12150 [(set_attr "type" "ssecvt")
12151 (set_attr "prefix" "vex")
12152 (set_attr "mode" "V4SF")])
12153
12154 (define_insn "vcvtps2ph256"
12155 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12156 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12157 (match_operand:SI 2 "immediate_operand" "N")]
12158 UNSPEC_VCVTPS2PH))]
12159 "TARGET_F16C"
12160 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12161 [(set_attr "type" "ssecvt")
12162 (set_attr "prefix" "vex")
12163 (set_attr "mode" "V8SF")])