re PR target/48860 (r173265 breaks bootstrap on x86_64-apple-darwin10)
[gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 ;; All vector modes including V1TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
27 V1TI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
30
31 ;; All vector modes
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
39
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
43
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
47
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
52
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
56
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
60
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
64
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
67 [V8SF V4DF])
68
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
75
76 ;; All QImode vector integer modes
77 (define_mode_iterator VI1
78 [(V32QI "TARGET_AVX") V16QI])
79
80 ;; All DImode vector integer modes
81 (define_mode_iterator VI8
82 [(V4DI "TARGET_AVX") V2DI])
83
84 ;; All 128bit vector integer modes
85 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
86
87 ;; Random 128bit vector integer mode combinations
88 (define_mode_iterator VI12_128 [V16QI V8HI])
89 (define_mode_iterator VI14_128 [V16QI V4SI])
90 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
91 (define_mode_iterator VI24_128 [V8HI V4SI])
92 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
93
94 ;; Int-float size matches
95 (define_mode_iterator VI4F_128 [V4SI V4SF])
96 (define_mode_iterator VI8F_128 [V2DI V2DF])
97 (define_mode_iterator VI4F_256 [V8SI V8SF])
98 (define_mode_iterator VI8F_256 [V4DI V4DF])
99
100 ;; Mapping from float mode to required SSE level
101 (define_mode_attr sse
102 [(SF "sse") (DF "sse2")
103 (V4SF "sse") (V2DF "sse2")
104 (V8SF "avx") (V4DF "avx")])
105
106 (define_mode_attr sse2
107 [(V16QI "sse2") (V32QI "avx")
108 (V2DI "sse2") (V4DI "avx")])
109
110 (define_mode_attr sse3
111 [(V16QI "sse3") (V32QI "avx")])
112
113 (define_mode_attr sse4_1
114 [(V4SF "sse4_1") (V2DF "sse4_1")
115 (V8SF "avx") (V4DF "avx")])
116
117 (define_mode_attr avxsizesuffix
118 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
119 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
120 (V8SF "256") (V4DF "256")
121 (V4SF "") (V2DF "")])
122
123 ;; SSE instruction mode
124 (define_mode_attr sseinsnmode
125 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
126 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
127 (V8SF "V8SF") (V4DF "V4DF")
128 (V4SF "V4SF") (V2DF "V2DF")])
129
130 ;; Mapping of vector float modes to an integer mode of the same size
131 (define_mode_attr sseintvecmode
132 [(V8SF "V8SI") (V4DF "V4DI")
133 (V4SF "V4SI") (V2DF "V2DI")])
134
135 ;; Mapping of vector modes to a vector mode of double size
136 (define_mode_attr ssedoublevecmode
137 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
138 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
139 (V8SF "V16SF") (V4DF "V8DF")
140 (V4SF "V8SF") (V2DF "V4DF")])
141
142 ;; Mapping of vector modes to a vector mode of half size
143 (define_mode_attr ssehalfvecmode
144 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
145 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
146 (V8SF "V4SF") (V4DF "V2DF")
147 (V4SF "V2SF")])
148
149 ;; Mapping of vector modes back to the scalar modes
150 (define_mode_attr ssescalarmode
151 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
152 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
153 (V8SF "SF") (V4DF "DF")
154 (V4SF "SF") (V2DF "DF")])
155
156 ;; Number of scalar elements in each vector type
157 (define_mode_attr ssescalarnum
158 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
159 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
160 (V8SF "8") (V4DF "4")
161 (V4SF "4") (V2DF "2")])
162
163 ;; SSE scalar suffix for vector modes
164 (define_mode_attr ssescalarmodesuffix
165 [(V8SF "ss") (V4DF "sd")
166 (V4SF "ss") (V2DF "sd")
167 (V8SI "ss") (V4DI "sd")
168 (V4SI "d")])
169
170 ;; Pack/unpack vector modes
171 (define_mode_attr sseunpackmode
172 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
173
174 (define_mode_attr ssepackmode
175 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")])
176
177 ;; Mapping of the max integer size for xop rotate immediate constraint
178 (define_mode_attr sserotatemax
179 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
180
181 ;; Mapping of immediate bits for blend instructions
182 (define_mode_attr blendbits
183 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
184
185 ;; Instruction suffix for sign and zero extensions.
186 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
187
188
189
190 ;; Mix-n-match
191 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
192 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
193
194 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
195
196 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
197
198 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
199 ;;
200 ;; Move patterns
201 ;;
202 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
203
204 ;; All of these patterns are enabled for SSE1 as well as SSE2.
205 ;; This is essential for maintaining stable calling conventions.
206
207 (define_expand "mov<mode>"
208 [(set (match_operand:V16 0 "nonimmediate_operand" "")
209 (match_operand:V16 1 "nonimmediate_operand" ""))]
210 "TARGET_SSE"
211 {
212 ix86_expand_vector_move (<MODE>mode, operands);
213 DONE;
214 })
215
216 (define_insn "*mov<mode>_internal"
217 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
218 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
219 "TARGET_SSE
220 && (register_operand (operands[0], <MODE>mode)
221 || register_operand (operands[1], <MODE>mode))"
222 {
223 switch (which_alternative)
224 {
225 case 0:
226 return standard_sse_constant_opcode (insn, operands[1]);
227 case 1:
228 case 2:
229 switch (get_attr_mode (insn))
230 {
231 case MODE_V8SF:
232 case MODE_V4SF:
233 if (TARGET_AVX
234 && (misaligned_operand (operands[0], <MODE>mode)
235 || misaligned_operand (operands[1], <MODE>mode)))
236 return "vmovups\t{%1, %0|%0, %1}";
237 else
238 return "%vmovaps\t{%1, %0|%0, %1}";
239
240 case MODE_V4DF:
241 case MODE_V2DF:
242 if (TARGET_AVX
243 && (misaligned_operand (operands[0], <MODE>mode)
244 || misaligned_operand (operands[1], <MODE>mode)))
245 return "vmovupd\t{%1, %0|%0, %1}";
246 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
247 return "%vmovaps\t{%1, %0|%0, %1}";
248 else
249 return "%vmovapd\t{%1, %0|%0, %1}";
250
251 case MODE_OI:
252 case MODE_TI:
253 if (TARGET_AVX
254 && (misaligned_operand (operands[0], <MODE>mode)
255 || misaligned_operand (operands[1], <MODE>mode)))
256 return "vmovdqu\t{%1, %0|%0, %1}";
257 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
258 return "%vmovaps\t{%1, %0|%0, %1}";
259 else
260 return "%vmovdqa\t{%1, %0|%0, %1}";
261
262 default:
263 gcc_unreachable ();
264 }
265 default:
266 gcc_unreachable ();
267 }
268 }
269 [(set_attr "type" "sselog1,ssemov,ssemov")
270 (set_attr "prefix" "maybe_vex")
271 (set (attr "mode")
272 (cond [(ne (symbol_ref "TARGET_AVX") (const_int 0))
273 (const_string "<sseinsnmode>")
274 (ior (ior
275 (ne (symbol_ref "optimize_function_for_size_p (cfun)")
276 (const_int 0))
277 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
278 (and (eq_attr "alternative" "2")
279 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
280 (const_int 0))))
281 (const_string "V4SF")
282 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
283 (const_string "V4SF")
284 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
285 (const_string "V2DF")
286 ]
287 (const_string "TI")))])
288
289 (define_insn "sse2_movq128"
290 [(set (match_operand:V2DI 0 "register_operand" "=x")
291 (vec_concat:V2DI
292 (vec_select:DI
293 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
294 (parallel [(const_int 0)]))
295 (const_int 0)))]
296 "TARGET_SSE2"
297 "%vmovq\t{%1, %0|%0, %1}"
298 [(set_attr "type" "ssemov")
299 (set_attr "prefix" "maybe_vex")
300 (set_attr "mode" "TI")])
301
302 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
303 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
304 ;; from memory, we'd prefer to load the memory directly into the %xmm
305 ;; register. To facilitate this happy circumstance, this pattern won't
306 ;; split until after register allocation. If the 64-bit value didn't
307 ;; come from memory, this is the best we can do. This is much better
308 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
309 ;; from there.
310
311 (define_insn_and_split "movdi_to_sse"
312 [(parallel
313 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
314 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
315 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
316 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
317 "#"
318 "&& reload_completed"
319 [(const_int 0)]
320 {
321 if (register_operand (operands[1], DImode))
322 {
323 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
324 Assemble the 64-bit DImode value in an xmm register. */
325 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
326 gen_rtx_SUBREG (SImode, operands[1], 0)));
327 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
328 gen_rtx_SUBREG (SImode, operands[1], 4)));
329 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
330 operands[2]));
331 }
332 else if (memory_operand (operands[1], DImode))
333 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
334 operands[1], const0_rtx));
335 else
336 gcc_unreachable ();
337 })
338
339 (define_split
340 [(set (match_operand:V4SF 0 "register_operand" "")
341 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
342 "TARGET_SSE && reload_completed"
343 [(set (match_dup 0)
344 (vec_merge:V4SF
345 (vec_duplicate:V4SF (match_dup 1))
346 (match_dup 2)
347 (const_int 1)))]
348 {
349 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
350 operands[2] = CONST0_RTX (V4SFmode);
351 })
352
353 (define_split
354 [(set (match_operand:V2DF 0 "register_operand" "")
355 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
356 "TARGET_SSE2 && reload_completed"
357 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
358 {
359 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
360 operands[2] = CONST0_RTX (DFmode);
361 })
362
363 (define_expand "push<mode>1"
364 [(match_operand:V16 0 "register_operand" "")]
365 "TARGET_SSE"
366 {
367 ix86_expand_push (<MODE>mode, operands[0]);
368 DONE;
369 })
370
371 (define_expand "movmisalign<mode>"
372 [(set (match_operand:V16 0 "nonimmediate_operand" "")
373 (match_operand:V16 1 "nonimmediate_operand" ""))]
374 "TARGET_SSE"
375 {
376 ix86_expand_vector_move_misalign (<MODE>mode, operands);
377 DONE;
378 })
379
380 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
381 [(set (match_operand:VF 0 "nonimmediate_operand" "")
382 (unspec:VF
383 [(match_operand:VF 1 "nonimmediate_operand" "")]
384 UNSPEC_MOVU))]
385 "TARGET_SSE"
386 {
387 if (MEM_P (operands[0]) && MEM_P (operands[1]))
388 operands[1] = force_reg (<MODE>mode, operands[1]);
389 })
390
391 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
392 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
393 (unspec:VF
394 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
395 UNSPEC_MOVU))]
396 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
397 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
398 [(set_attr "type" "ssemov")
399 (set_attr "movu" "1")
400 (set_attr "prefix" "maybe_vex")
401 (set_attr "mode" "<MODE>")])
402
403 (define_expand "<sse2>_movdqu<avxsizesuffix>"
404 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
405 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
406 UNSPEC_MOVU))]
407 "TARGET_SSE2"
408 {
409 if (MEM_P (operands[0]) && MEM_P (operands[1]))
410 operands[1] = force_reg (<MODE>mode, operands[1]);
411 })
412
413 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
414 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
415 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
416 UNSPEC_MOVU))]
417 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
418 "%vmovdqu\t{%1, %0|%0, %1}"
419 [(set_attr "type" "ssemov")
420 (set_attr "movu" "1")
421 (set (attr "prefix_data16")
422 (if_then_else
423 (ne (symbol_ref "TARGET_AVX") (const_int 0))
424 (const_string "*")
425 (const_string "1")))
426 (set_attr "prefix" "maybe_vex")
427 (set_attr "mode" "<sseinsnmode>")])
428
429 (define_insn "<sse3>_lddqu<avxsizesuffix>"
430 [(set (match_operand:VI1 0 "register_operand" "=x")
431 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
432 UNSPEC_LDDQU))]
433 "TARGET_SSE3"
434 "%vlddqu\t{%1, %0|%0, %1}"
435 [(set_attr "type" "ssemov")
436 (set_attr "movu" "1")
437 (set (attr "prefix_data16")
438 (if_then_else
439 (ne (symbol_ref "TARGET_AVX") (const_int 0))
440 (const_string "*")
441 (const_string "0")))
442 (set (attr "prefix_rep")
443 (if_then_else
444 (ne (symbol_ref "TARGET_AVX") (const_int 0))
445 (const_string "*")
446 (const_string "1")))
447 (set_attr "prefix" "maybe_vex")
448 (set_attr "mode" "<sseinsnmode>")])
449
450 (define_insn "sse2_movntsi"
451 [(set (match_operand:SI 0 "memory_operand" "=m")
452 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
453 UNSPEC_MOVNT))]
454 "TARGET_SSE2"
455 "movnti\t{%1, %0|%0, %1}"
456 [(set_attr "type" "ssemov")
457 (set_attr "prefix_data16" "0")
458 (set_attr "mode" "V2DF")])
459
460 (define_insn "<sse>_movnt<mode>"
461 [(set (match_operand:VF 0 "memory_operand" "=m")
462 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
463 UNSPEC_MOVNT))]
464 "TARGET_SSE"
465 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
466 [(set_attr "type" "ssemov")
467 (set_attr "prefix" "maybe_vex")
468 (set_attr "mode" "<MODE>")])
469
470 (define_insn "<sse2>_movnt<mode>"
471 [(set (match_operand:VI8 0 "memory_operand" "=m")
472 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
473 UNSPEC_MOVNT))]
474 "TARGET_SSE2"
475 "%vmovntdq\t{%1, %0|%0, %1}"
476 [(set_attr "type" "ssecvt")
477 (set (attr "prefix_data16")
478 (if_then_else
479 (ne (symbol_ref "TARGET_AVX") (const_int 0))
480 (const_string "*")
481 (const_string "1")))
482 (set_attr "prefix" "maybe_vex")
483 (set_attr "mode" "<sseinsnmode>")])
484
485 ; Expand patterns for non-temporal stores. At the moment, only those
486 ; that directly map to insns are defined; it would be possible to
487 ; define patterns for other modes that would expand to several insns.
488
489 ;; Modes handled by storent patterns.
490 (define_mode_iterator STORENT_MODE
491 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
492 (V2DI "TARGET_SSE2")
493 (V8SF "TARGET_AVX") V4SF
494 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
495
496 (define_expand "storent<mode>"
497 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
498 (unspec:STORENT_MODE
499 [(match_operand:STORENT_MODE 1 "register_operand" "")]
500 UNSPEC_MOVNT))]
501 "TARGET_SSE")
502
503 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
504 ;;
505 ;; Parallel floating point arithmetic
506 ;;
507 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
508
509 (define_expand "<code><mode>2"
510 [(set (match_operand:VF 0 "register_operand" "")
511 (absneg:VF
512 (match_operand:VF 1 "register_operand" "")))]
513 "TARGET_SSE"
514 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
515
516 (define_insn_and_split "*absneg<mode>2"
517 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
518 (match_operator:VF 3 "absneg_operator"
519 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
520 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
521 "TARGET_SSE"
522 "#"
523 "reload_completed"
524 [(const_int 0)]
525 {
526 enum rtx_code absneg_op;
527 rtx op1, op2;
528 rtx t;
529
530 if (TARGET_AVX)
531 {
532 if (MEM_P (operands[1]))
533 op1 = operands[2], op2 = operands[1];
534 else
535 op1 = operands[1], op2 = operands[2];
536 }
537 else
538 {
539 op1 = operands[0];
540 if (rtx_equal_p (operands[0], operands[1]))
541 op2 = operands[2];
542 else
543 op2 = operands[1];
544 }
545
546 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
547 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
548 t = gen_rtx_SET (VOIDmode, operands[0], t);
549 emit_insn (t);
550 DONE;
551 }
552 [(set_attr "isa" "noavx,noavx,avx,avx")])
553
554 (define_expand "<plusminus_insn><mode>3"
555 [(set (match_operand:VF 0 "register_operand" "")
556 (plusminus:VF
557 (match_operand:VF 1 "nonimmediate_operand" "")
558 (match_operand:VF 2 "nonimmediate_operand" "")))]
559 "TARGET_SSE"
560 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
561
562 (define_insn "*<plusminus_insn><mode>3"
563 [(set (match_operand:VF 0 "register_operand" "=x,x")
564 (plusminus:VF
565 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
566 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
567 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
568 "@
569 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
570 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
571 [(set_attr "isa" "noavx,avx")
572 (set_attr "type" "sseadd")
573 (set_attr "prefix" "orig,vex")
574 (set_attr "mode" "<MODE>")])
575
576 (define_insn "<sse>_vm<plusminus_insn><mode>3"
577 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
578 (vec_merge:VF_128
579 (plusminus:VF_128
580 (match_operand:VF_128 1 "register_operand" "0,x")
581 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
582 (match_dup 1)
583 (const_int 1)))]
584 "TARGET_SSE"
585 "@
586 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
587 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
588 [(set_attr "isa" "noavx,avx")
589 (set_attr "type" "sseadd")
590 (set_attr "prefix" "orig,vex")
591 (set_attr "mode" "<ssescalarmode>")])
592
593 (define_expand "mul<mode>3"
594 [(set (match_operand:VF 0 "register_operand" "")
595 (mult:VF
596 (match_operand:VF 1 "nonimmediate_operand" "")
597 (match_operand:VF 2 "nonimmediate_operand" "")))]
598 "TARGET_SSE"
599 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
600
601 (define_insn "*mul<mode>3"
602 [(set (match_operand:VF 0 "register_operand" "=x,x")
603 (mult:VF
604 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
605 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
606 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
607 "@
608 mul<ssemodesuffix>\t{%2, %0|%0, %2}
609 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
610 [(set_attr "isa" "noavx,avx")
611 (set_attr "type" "ssemul")
612 (set_attr "prefix" "orig,vex")
613 (set_attr "mode" "<MODE>")])
614
615 (define_insn "<sse>_vmmul<mode>3"
616 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
617 (vec_merge:VF_128
618 (mult:VF_128
619 (match_operand:VF_128 1 "register_operand" "0,x")
620 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
621 (match_dup 1)
622 (const_int 1)))]
623 "TARGET_SSE"
624 "@
625 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
626 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
627 [(set_attr "isa" "noavx,avx")
628 (set_attr "type" "ssemul")
629 (set_attr "prefix" "orig,vex")
630 (set_attr "mode" "<ssescalarmode>")])
631
632 (define_expand "div<mode>3"
633 [(set (match_operand:VF2 0 "register_operand" "")
634 (div:VF2 (match_operand:VF2 1 "register_operand" "")
635 (match_operand:VF2 2 "nonimmediate_operand" "")))]
636 "TARGET_SSE2"
637 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
638
639 (define_expand "div<mode>3"
640 [(set (match_operand:VF1 0 "register_operand" "")
641 (div:VF1 (match_operand:VF1 1 "register_operand" "")
642 (match_operand:VF1 2 "nonimmediate_operand" "")))]
643 "TARGET_SSE"
644 {
645 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
646
647 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
648 && flag_finite_math_only && !flag_trapping_math
649 && flag_unsafe_math_optimizations)
650 {
651 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
652 DONE;
653 }
654 })
655
656 (define_insn "<sse>_div<mode>3"
657 [(set (match_operand:VF 0 "register_operand" "=x,x")
658 (div:VF
659 (match_operand:VF 1 "register_operand" "0,x")
660 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
661 "TARGET_SSE"
662 "@
663 div<ssemodesuffix>\t{%2, %0|%0, %2}
664 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
665 [(set_attr "isa" "noavx,avx")
666 (set_attr "type" "ssediv")
667 (set_attr "prefix" "orig,vex")
668 (set_attr "mode" "<MODE>")])
669
670 (define_insn "<sse>_vmdiv<mode>3"
671 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
672 (vec_merge:VF_128
673 (div:VF_128
674 (match_operand:VF_128 1 "register_operand" "0,x")
675 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
676 (match_dup 1)
677 (const_int 1)))]
678 "TARGET_SSE"
679 "@
680 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
681 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
682 [(set_attr "isa" "noavx,avx")
683 (set_attr "type" "ssediv")
684 (set_attr "prefix" "orig,vex")
685 (set_attr "mode" "<ssescalarmode>")])
686
687 (define_insn "<sse>_rcp<mode>2"
688 [(set (match_operand:VF1 0 "register_operand" "=x")
689 (unspec:VF1
690 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
691 "TARGET_SSE"
692 "%vrcpps\t{%1, %0|%0, %1}"
693 [(set_attr "type" "sse")
694 (set_attr "atom_sse_attr" "rcp")
695 (set_attr "prefix" "maybe_vex")
696 (set_attr "mode" "<MODE>")])
697
698 (define_insn "sse_vmrcpv4sf2"
699 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
700 (vec_merge:V4SF
701 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
702 UNSPEC_RCP)
703 (match_operand:V4SF 2 "register_operand" "0,x")
704 (const_int 1)))]
705 "TARGET_SSE"
706 "@
707 rcpss\t{%1, %0|%0, %1}
708 vrcpss\t{%1, %2, %0|%0, %2, %1}"
709 [(set_attr "isa" "noavx,avx")
710 (set_attr "type" "sse")
711 (set_attr "atom_sse_attr" "rcp")
712 (set_attr "prefix" "orig,vex")
713 (set_attr "mode" "SF")])
714
715 (define_expand "sqrt<mode>2"
716 [(set (match_operand:VF2 0 "register_operand" "")
717 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
718 "TARGET_SSE2")
719
720 (define_expand "sqrt<mode>2"
721 [(set (match_operand:VF1 0 "register_operand" "")
722 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
723 "TARGET_SSE"
724 {
725 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
726 && flag_finite_math_only && !flag_trapping_math
727 && flag_unsafe_math_optimizations)
728 {
729 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
730 DONE;
731 }
732 })
733
734 (define_insn "<sse>_sqrt<mode>2"
735 [(set (match_operand:VF 0 "register_operand" "=x")
736 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
737 "TARGET_SSE"
738 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
739 [(set_attr "type" "sse")
740 (set_attr "atom_sse_attr" "sqrt")
741 (set_attr "prefix" "maybe_vex")
742 (set_attr "mode" "<MODE>")])
743
744 (define_insn "<sse>_vmsqrt<mode>2"
745 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
746 (vec_merge:VF_128
747 (sqrt:VF_128
748 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
749 (match_operand:VF_128 2 "register_operand" "0,x")
750 (const_int 1)))]
751 "TARGET_SSE"
752 "@
753 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
754 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
755 [(set_attr "isa" "noavx,avx")
756 (set_attr "type" "sse")
757 (set_attr "atom_sse_attr" "sqrt")
758 (set_attr "prefix" "orig,vex")
759 (set_attr "mode" "<ssescalarmode>")])
760
761 (define_expand "rsqrt<mode>2"
762 [(set (match_operand:VF1 0 "register_operand" "")
763 (unspec:VF1
764 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
765 "TARGET_SSE_MATH"
766 {
767 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
768 DONE;
769 })
770
771 (define_insn "<sse>_rsqrt<mode>2"
772 [(set (match_operand:VF1 0 "register_operand" "=x")
773 (unspec:VF1
774 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
775 "TARGET_SSE"
776 "%vrsqrtps\t{%1, %0|%0, %1}"
777 [(set_attr "type" "sse")
778 (set_attr "prefix" "maybe_vex")
779 (set_attr "mode" "<MODE>")])
780
781 (define_insn "sse_vmrsqrtv4sf2"
782 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
783 (vec_merge:V4SF
784 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
785 UNSPEC_RSQRT)
786 (match_operand:V4SF 2 "register_operand" "0,x")
787 (const_int 1)))]
788 "TARGET_SSE"
789 "@
790 rsqrtss\t{%1, %0|%0, %1}
791 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
792 [(set_attr "isa" "noavx,avx")
793 (set_attr "type" "sse")
794 (set_attr "prefix" "orig,vex")
795 (set_attr "mode" "SF")])
796
797 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
798 ;; isn't really correct, as those rtl operators aren't defined when
799 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
800
801 (define_expand "<code><mode>3"
802 [(set (match_operand:VF 0 "register_operand" "")
803 (smaxmin:VF
804 (match_operand:VF 1 "nonimmediate_operand" "")
805 (match_operand:VF 2 "nonimmediate_operand" "")))]
806 "TARGET_SSE"
807 {
808 if (!flag_finite_math_only)
809 operands[1] = force_reg (<MODE>mode, operands[1]);
810 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
811 })
812
813 (define_insn "*<code><mode>3_finite"
814 [(set (match_operand:VF 0 "register_operand" "=x,x")
815 (smaxmin:VF
816 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
817 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
818 "TARGET_SSE && flag_finite_math_only
819 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
820 "@
821 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
822 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
823 [(set_attr "isa" "noavx,avx")
824 (set_attr "type" "sseadd")
825 (set_attr "prefix" "orig,vex")
826 (set_attr "mode" "<MODE>")])
827
828 (define_insn "*<code><mode>3"
829 [(set (match_operand:VF 0 "register_operand" "=x,x")
830 (smaxmin:VF
831 (match_operand:VF 1 "register_operand" "0,x")
832 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
833 "TARGET_SSE && !flag_finite_math_only"
834 "@
835 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
836 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
837 [(set_attr "isa" "noavx,avx")
838 (set_attr "type" "sseadd")
839 (set_attr "prefix" "orig,vex")
840 (set_attr "mode" "<MODE>")])
841
842 (define_insn "<sse>_vm<code><mode>3"
843 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
844 (vec_merge:VF_128
845 (smaxmin:VF_128
846 (match_operand:VF_128 1 "register_operand" "0,x")
847 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
848 (match_dup 1)
849 (const_int 1)))]
850 "TARGET_SSE"
851 "@
852 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
853 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
854 [(set_attr "isa" "noavx,avx")
855 (set_attr "type" "sse")
856 (set_attr "prefix" "orig,vex")
857 (set_attr "mode" "<ssescalarmode>")])
858
859 ;; These versions of the min/max patterns implement exactly the operations
860 ;; min = (op1 < op2 ? op1 : op2)
861 ;; max = (!(op1 < op2) ? op1 : op2)
862 ;; Their operands are not commutative, and thus they may be used in the
863 ;; presence of -0.0 and NaN.
864
865 (define_insn "*ieee_smin<mode>3"
866 [(set (match_operand:VF 0 "register_operand" "=x,x")
867 (unspec:VF
868 [(match_operand:VF 1 "register_operand" "0,x")
869 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
870 UNSPEC_IEEE_MIN))]
871 "TARGET_SSE"
872 "@
873 min<ssemodesuffix>\t{%2, %0|%0, %2}
874 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
875 [(set_attr "isa" "noavx,avx")
876 (set_attr "type" "sseadd")
877 (set_attr "prefix" "orig,vex")
878 (set_attr "mode" "<MODE>")])
879
880 (define_insn "*ieee_smax<mode>3"
881 [(set (match_operand:VF 0 "register_operand" "=x,x")
882 (unspec:VF
883 [(match_operand:VF 1 "register_operand" "0,x")
884 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
885 UNSPEC_IEEE_MAX))]
886 "TARGET_SSE"
887 "@
888 max<ssemodesuffix>\t{%2, %0|%0, %2}
889 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
890 [(set_attr "isa" "noavx,avx")
891 (set_attr "type" "sseadd")
892 (set_attr "prefix" "orig,vex")
893 (set_attr "mode" "<MODE>")])
894
895 (define_insn "avx_addsubv4df3"
896 [(set (match_operand:V4DF 0 "register_operand" "=x")
897 (vec_merge:V4DF
898 (plus:V4DF
899 (match_operand:V4DF 1 "register_operand" "x")
900 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
901 (minus:V4DF (match_dup 1) (match_dup 2))
902 (const_int 10)))]
903 "TARGET_AVX"
904 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
905 [(set_attr "type" "sseadd")
906 (set_attr "prefix" "vex")
907 (set_attr "mode" "V4DF")])
908
909 (define_insn "sse3_addsubv2df3"
910 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
911 (vec_merge:V2DF
912 (plus:V2DF
913 (match_operand:V2DF 1 "register_operand" "0,x")
914 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
915 (minus:V2DF (match_dup 1) (match_dup 2))
916 (const_int 2)))]
917 "TARGET_SSE3"
918 "@
919 addsubpd\t{%2, %0|%0, %2}
920 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
921 [(set_attr "isa" "noavx,avx")
922 (set_attr "type" "sseadd")
923 (set_attr "atom_unit" "complex")
924 (set_attr "prefix" "orig,vex")
925 (set_attr "mode" "V2DF")])
926
927 (define_insn "avx_addsubv8sf3"
928 [(set (match_operand:V8SF 0 "register_operand" "=x")
929 (vec_merge:V8SF
930 (plus:V8SF
931 (match_operand:V8SF 1 "register_operand" "x")
932 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
933 (minus:V8SF (match_dup 1) (match_dup 2))
934 (const_int 170)))]
935 "TARGET_AVX"
936 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
937 [(set_attr "type" "sseadd")
938 (set_attr "prefix" "vex")
939 (set_attr "mode" "V8SF")])
940
941 (define_insn "sse3_addsubv4sf3"
942 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
943 (vec_merge:V4SF
944 (plus:V4SF
945 (match_operand:V4SF 1 "register_operand" "0,x")
946 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
947 (minus:V4SF (match_dup 1) (match_dup 2))
948 (const_int 10)))]
949 "TARGET_SSE3"
950 "@
951 addsubps\t{%2, %0|%0, %2}
952 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
953 [(set_attr "isa" "noavx,avx")
954 (set_attr "type" "sseadd")
955 (set_attr "prefix" "orig,vex")
956 (set_attr "prefix_rep" "1,*")
957 (set_attr "mode" "V4SF")])
958
959 (define_insn "avx_h<plusminus_insn>v4df3"
960 [(set (match_operand:V4DF 0 "register_operand" "=x")
961 (vec_concat:V4DF
962 (vec_concat:V2DF
963 (plusminus:DF
964 (vec_select:DF
965 (match_operand:V4DF 1 "register_operand" "x")
966 (parallel [(const_int 0)]))
967 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
968 (plusminus:DF
969 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
970 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
971 (vec_concat:V2DF
972 (plusminus:DF
973 (vec_select:DF
974 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
975 (parallel [(const_int 0)]))
976 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
977 (plusminus:DF
978 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
979 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
980 "TARGET_AVX"
981 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
982 [(set_attr "type" "sseadd")
983 (set_attr "prefix" "vex")
984 (set_attr "mode" "V4DF")])
985
986 (define_insn "sse3_h<plusminus_insn>v2df3"
987 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
988 (vec_concat:V2DF
989 (plusminus:DF
990 (vec_select:DF
991 (match_operand:V2DF 1 "register_operand" "0,x")
992 (parallel [(const_int 0)]))
993 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
994 (plusminus:DF
995 (vec_select:DF
996 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
997 (parallel [(const_int 0)]))
998 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
999 "TARGET_SSE3"
1000 "@
1001 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1002 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1003 [(set_attr "isa" "noavx,avx")
1004 (set_attr "type" "sseadd")
1005 (set_attr "prefix" "orig,vex")
1006 (set_attr "mode" "V2DF")])
1007
1008 (define_insn "avx_h<plusminus_insn>v8sf3"
1009 [(set (match_operand:V8SF 0 "register_operand" "=x")
1010 (vec_concat:V8SF
1011 (vec_concat:V4SF
1012 (vec_concat:V2SF
1013 (plusminus:SF
1014 (vec_select:SF
1015 (match_operand:V8SF 1 "register_operand" "x")
1016 (parallel [(const_int 0)]))
1017 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1018 (plusminus:SF
1019 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1020 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1021 (vec_concat:V2SF
1022 (plusminus:SF
1023 (vec_select:SF
1024 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1025 (parallel [(const_int 0)]))
1026 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1027 (plusminus:SF
1028 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1029 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1030 (vec_concat:V4SF
1031 (vec_concat:V2SF
1032 (plusminus:SF
1033 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1034 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1035 (plusminus:SF
1036 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1037 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1038 (vec_concat:V2SF
1039 (plusminus:SF
1040 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1041 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1042 (plusminus:SF
1043 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1044 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1045 "TARGET_AVX"
1046 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1047 [(set_attr "type" "sseadd")
1048 (set_attr "prefix" "vex")
1049 (set_attr "mode" "V8SF")])
1050
1051 (define_insn "sse3_h<plusminus_insn>v4sf3"
1052 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1053 (vec_concat:V4SF
1054 (vec_concat:V2SF
1055 (plusminus:SF
1056 (vec_select:SF
1057 (match_operand:V4SF 1 "register_operand" "0,x")
1058 (parallel [(const_int 0)]))
1059 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1060 (plusminus:SF
1061 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1062 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1063 (vec_concat:V2SF
1064 (plusminus:SF
1065 (vec_select:SF
1066 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1067 (parallel [(const_int 0)]))
1068 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1069 (plusminus:SF
1070 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1071 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1072 "TARGET_SSE3"
1073 "@
1074 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1075 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1076 [(set_attr "isa" "noavx,avx")
1077 (set_attr "type" "sseadd")
1078 (set_attr "atom_unit" "complex")
1079 (set_attr "prefix" "orig,vex")
1080 (set_attr "prefix_rep" "1,*")
1081 (set_attr "mode" "V4SF")])
1082
1083 (define_expand "reduc_splus_v4df"
1084 [(match_operand:V4DF 0 "register_operand" "")
1085 (match_operand:V4DF 1 "register_operand" "")]
1086 "TARGET_AVX"
1087 {
1088 rtx tmp = gen_reg_rtx (V4DFmode);
1089 rtx tmp2 = gen_reg_rtx (V4DFmode);
1090 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1091 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1092 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1093 DONE;
1094 })
1095
1096 (define_expand "reduc_splus_v2df"
1097 [(match_operand:V2DF 0 "register_operand" "")
1098 (match_operand:V2DF 1 "register_operand" "")]
1099 "TARGET_SSE3"
1100 {
1101 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1102 DONE;
1103 })
1104
1105 (define_expand "reduc_splus_v8sf"
1106 [(match_operand:V8SF 0 "register_operand" "")
1107 (match_operand:V8SF 1 "register_operand" "")]
1108 "TARGET_AVX"
1109 {
1110 rtx tmp = gen_reg_rtx (V8SFmode);
1111 rtx tmp2 = gen_reg_rtx (V8SFmode);
1112 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1113 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1114 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1115 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1116 DONE;
1117 })
1118
1119 (define_expand "reduc_splus_v4sf"
1120 [(match_operand:V4SF 0 "register_operand" "")
1121 (match_operand:V4SF 1 "register_operand" "")]
1122 "TARGET_SSE"
1123 {
1124 if (TARGET_SSE3)
1125 {
1126 rtx tmp = gen_reg_rtx (V4SFmode);
1127 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1128 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1129 }
1130 else
1131 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1132 DONE;
1133 })
1134
1135
1136 (define_expand "reduc_smax_v4sf"
1137 [(match_operand:V4SF 0 "register_operand" "")
1138 (match_operand:V4SF 1 "register_operand" "")]
1139 "TARGET_SSE"
1140 {
1141 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1142 DONE;
1143 })
1144
1145 (define_expand "reduc_smin_v4sf"
1146 [(match_operand:V4SF 0 "register_operand" "")
1147 (match_operand:V4SF 1 "register_operand" "")]
1148 "TARGET_SSE"
1149 {
1150 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1151 DONE;
1152 })
1153
1154 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1155 ;;
1156 ;; Parallel floating point comparisons
1157 ;;
1158 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1159
1160 (define_insn "avx_cmp<mode>3"
1161 [(set (match_operand:VF 0 "register_operand" "=x")
1162 (unspec:VF
1163 [(match_operand:VF 1 "register_operand" "x")
1164 (match_operand:VF 2 "nonimmediate_operand" "xm")
1165 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1166 UNSPEC_PCMP))]
1167 "TARGET_AVX"
1168 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1169 [(set_attr "type" "ssecmp")
1170 (set_attr "length_immediate" "1")
1171 (set_attr "prefix" "vex")
1172 (set_attr "mode" "<MODE>")])
1173
1174 (define_insn "avx_vmcmp<mode>3"
1175 [(set (match_operand:VF_128 0 "register_operand" "=x")
1176 (vec_merge:VF_128
1177 (unspec:VF_128
1178 [(match_operand:VF_128 1 "register_operand" "x")
1179 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1180 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1181 UNSPEC_PCMP)
1182 (match_dup 1)
1183 (const_int 1)))]
1184 "TARGET_AVX"
1185 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1186 [(set_attr "type" "ssecmp")
1187 (set_attr "length_immediate" "1")
1188 (set_attr "prefix" "vex")
1189 (set_attr "mode" "<ssescalarmode>")])
1190
1191 (define_insn "<sse>_maskcmp<mode>3"
1192 [(set (match_operand:VF 0 "register_operand" "=x,x")
1193 (match_operator:VF 3 "sse_comparison_operator"
1194 [(match_operand:VF 1 "register_operand" "0,x")
1195 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1196 "TARGET_SSE"
1197 "@
1198 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1199 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1200 [(set_attr "isa" "noavx,avx")
1201 (set_attr "type" "ssecmp")
1202 (set_attr "length_immediate" "1")
1203 (set_attr "prefix" "orig,vex")
1204 (set_attr "mode" "<MODE>")])
1205
1206 (define_insn "<sse>_vmmaskcmp<mode>3"
1207 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1208 (vec_merge:VF_128
1209 (match_operator:VF_128 3 "sse_comparison_operator"
1210 [(match_operand:VF_128 1 "register_operand" "0,x")
1211 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1212 (match_dup 1)
1213 (const_int 1)))]
1214 "TARGET_SSE"
1215 "@
1216 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1217 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1218 [(set_attr "isa" "noavx,avx")
1219 (set_attr "type" "ssecmp")
1220 (set_attr "length_immediate" "1,*")
1221 (set_attr "prefix" "orig,vex")
1222 (set_attr "mode" "<ssescalarmode>")])
1223
1224 (define_insn "<sse>_comi"
1225 [(set (reg:CCFP FLAGS_REG)
1226 (compare:CCFP
1227 (vec_select:MODEF
1228 (match_operand:<ssevecmode> 0 "register_operand" "x")
1229 (parallel [(const_int 0)]))
1230 (vec_select:MODEF
1231 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1232 (parallel [(const_int 0)]))))]
1233 "SSE_FLOAT_MODE_P (<MODE>mode)"
1234 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1235 [(set_attr "type" "ssecomi")
1236 (set_attr "prefix" "maybe_vex")
1237 (set_attr "prefix_rep" "0")
1238 (set (attr "prefix_data16")
1239 (if_then_else (eq_attr "mode" "DF")
1240 (const_string "1")
1241 (const_string "0")))
1242 (set_attr "mode" "<MODE>")])
1243
1244 (define_insn "<sse>_ucomi"
1245 [(set (reg:CCFPU FLAGS_REG)
1246 (compare:CCFPU
1247 (vec_select:MODEF
1248 (match_operand:<ssevecmode> 0 "register_operand" "x")
1249 (parallel [(const_int 0)]))
1250 (vec_select:MODEF
1251 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1252 (parallel [(const_int 0)]))))]
1253 "SSE_FLOAT_MODE_P (<MODE>mode)"
1254 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1255 [(set_attr "type" "ssecomi")
1256 (set_attr "prefix" "maybe_vex")
1257 (set_attr "prefix_rep" "0")
1258 (set (attr "prefix_data16")
1259 (if_then_else (eq_attr "mode" "DF")
1260 (const_string "1")
1261 (const_string "0")))
1262 (set_attr "mode" "<MODE>")])
1263
1264 (define_expand "vcond<mode>"
1265 [(set (match_operand:VF 0 "register_operand" "")
1266 (if_then_else:VF
1267 (match_operator 3 ""
1268 [(match_operand:VF 4 "nonimmediate_operand" "")
1269 (match_operand:VF 5 "nonimmediate_operand" "")])
1270 (match_operand:VF 1 "general_operand" "")
1271 (match_operand:VF 2 "general_operand" "")))]
1272 "TARGET_SSE"
1273 {
1274 bool ok = ix86_expand_fp_vcond (operands);
1275 gcc_assert (ok);
1276 DONE;
1277 })
1278
1279 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1280 ;;
1281 ;; Parallel floating point logical operations
1282 ;;
1283 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1284
1285 (define_insn "<sse>_andnot<mode>3"
1286 [(set (match_operand:VF 0 "register_operand" "=x,x")
1287 (and:VF
1288 (not:VF
1289 (match_operand:VF 1 "register_operand" "0,x"))
1290 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1291 "TARGET_SSE"
1292 {
1293 static char buf[32];
1294 const char *insn;
1295 const char *suffix
1296 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1297
1298 switch (which_alternative)
1299 {
1300 case 0:
1301 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1302 break;
1303 case 1:
1304 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1305 break;
1306 default:
1307 gcc_unreachable ();
1308 }
1309
1310 snprintf (buf, sizeof (buf), insn, suffix);
1311 return buf;
1312 }
1313 [(set_attr "isa" "noavx,avx")
1314 (set_attr "type" "sselog")
1315 (set_attr "prefix" "orig,vex")
1316 (set_attr "mode" "<MODE>")])
1317
1318 (define_expand "<code><mode>3"
1319 [(set (match_operand:VF 0 "register_operand" "")
1320 (any_logic:VF
1321 (match_operand:VF 1 "nonimmediate_operand" "")
1322 (match_operand:VF 2 "nonimmediate_operand" "")))]
1323 "TARGET_SSE"
1324 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1325
1326 (define_insn "*<code><mode>3"
1327 [(set (match_operand:VF 0 "register_operand" "=x,x")
1328 (any_logic:VF
1329 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1330 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1331 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1332 {
1333 static char buf[32];
1334 const char *insn;
1335 const char *suffix
1336 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1337
1338 switch (which_alternative)
1339 {
1340 case 0:
1341 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1342 break;
1343 case 1:
1344 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1345 break;
1346 default:
1347 gcc_unreachable ();
1348 }
1349
1350 snprintf (buf, sizeof (buf), insn, suffix);
1351 return buf;
1352 }
1353 [(set_attr "isa" "noavx,avx")
1354 (set_attr "type" "sselog")
1355 (set_attr "prefix" "orig,vex")
1356 (set_attr "mode" "<MODE>")])
1357
1358 (define_expand "copysign<mode>3"
1359 [(set (match_dup 4)
1360 (and:VF
1361 (not:VF (match_dup 3))
1362 (match_operand:VF 1 "nonimmediate_operand" "")))
1363 (set (match_dup 5)
1364 (and:VF (match_dup 3)
1365 (match_operand:VF 2 "nonimmediate_operand" "")))
1366 (set (match_operand:VF 0 "register_operand" "")
1367 (ior:VF (match_dup 4) (match_dup 5)))]
1368 "TARGET_SSE"
1369 {
1370 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1371
1372 operands[4] = gen_reg_rtx (<MODE>mode);
1373 operands[5] = gen_reg_rtx (<MODE>mode);
1374 })
1375
1376 ;; Also define scalar versions. These are used for abs, neg, and
1377 ;; conditional move. Using subregs into vector modes causes register
1378 ;; allocation lossage. These patterns do not allow memory operands
1379 ;; because the native instructions read the full 128-bits.
1380
1381 (define_insn "*andnot<mode>3"
1382 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1383 (and:MODEF
1384 (not:MODEF
1385 (match_operand:MODEF 1 "register_operand" "0,x"))
1386 (match_operand:MODEF 2 "register_operand" "x,x")))]
1387 "SSE_FLOAT_MODE_P (<MODE>mode)"
1388 {
1389 static char buf[32];
1390 const char *insn;
1391 const char *suffix
1392 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1393
1394 switch (which_alternative)
1395 {
1396 case 0:
1397 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1398 break;
1399 case 1:
1400 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1401 break;
1402 default:
1403 gcc_unreachable ();
1404 }
1405
1406 snprintf (buf, sizeof (buf), insn, suffix);
1407 return buf;
1408 }
1409 [(set_attr "isa" "noavx,avx")
1410 (set_attr "type" "sselog")
1411 (set_attr "prefix" "orig,vex")
1412 (set_attr "mode" "<ssevecmode>")])
1413
1414 (define_insn "*<code><mode>3"
1415 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1416 (any_logic:MODEF
1417 (match_operand:MODEF 1 "register_operand" "%0,x")
1418 (match_operand:MODEF 2 "register_operand" "x,x")))]
1419 "SSE_FLOAT_MODE_P (<MODE>mode)"
1420 {
1421 static char buf[32];
1422 const char *insn;
1423 const char *suffix
1424 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1425
1426 switch (which_alternative)
1427 {
1428 case 0:
1429 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1430 break;
1431 case 1:
1432 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1433 break;
1434 default:
1435 gcc_unreachable ();
1436 }
1437
1438 snprintf (buf, sizeof (buf), insn, suffix);
1439 return buf;
1440 }
1441 [(set_attr "isa" "noavx,avx")
1442 (set_attr "type" "sselog")
1443 (set_attr "prefix" "orig,vex")
1444 (set_attr "mode" "<ssevecmode>")])
1445
1446 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1447 ;;
1448 ;; FMA4 floating point multiply/accumulate instructions. This
1449 ;; includes the scalar version of the instructions as well as the
1450 ;; vector.
1451 ;;
1452 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1453
1454 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1455 ;; combine to generate a multiply/add with two memory references. We then
1456 ;; split this insn, into loading up the destination register with one of the
1457 ;; memory operations. If we don't manage to split the insn, reload will
1458 ;; generate the appropriate moves. The reason this is needed, is that combine
1459 ;; has already folded one of the memory references into both the multiply and
1460 ;; add insns, and it can't generate a new pseudo. I.e.:
1461 ;; (set (reg1) (mem (addr1)))
1462 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1463 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1464 ;;
1465 ;; ??? This is historic, pre-dating the gimple fma transformation.
1466 ;; We could now properly represent that only one memory operand is
1467 ;; allowed and not be penalized during optimization.
1468
1469 ;; Intrinsic FMA operations.
1470
1471 ;; The standard names for fma is only available with SSE math enabled.
1472 (define_expand "fma<mode>4"
1473 [(set (match_operand:FMAMODE 0 "register_operand")
1474 (fma:FMAMODE
1475 (match_operand:FMAMODE 1 "nonimmediate_operand")
1476 (match_operand:FMAMODE 2 "nonimmediate_operand")
1477 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1478 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1479
1480 (define_expand "fms<mode>4"
1481 [(set (match_operand:FMAMODE 0 "register_operand")
1482 (fma:FMAMODE
1483 (match_operand:FMAMODE 1 "nonimmediate_operand")
1484 (match_operand:FMAMODE 2 "nonimmediate_operand")
1485 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1486 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1487
1488 (define_expand "fnma<mode>4"
1489 [(set (match_operand:FMAMODE 0 "register_operand")
1490 (fma:FMAMODE
1491 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1492 (match_operand:FMAMODE 2 "nonimmediate_operand")
1493 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1494 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1495
1496 (define_expand "fnms<mode>4"
1497 [(set (match_operand:FMAMODE 0 "register_operand")
1498 (fma:FMAMODE
1499 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1500 (match_operand:FMAMODE 2 "nonimmediate_operand")
1501 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1502 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1503
1504 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1505 (define_expand "fma4i_fmadd_<mode>"
1506 [(set (match_operand:FMAMODE 0 "register_operand")
1507 (fma:FMAMODE
1508 (match_operand:FMAMODE 1 "nonimmediate_operand")
1509 (match_operand:FMAMODE 2 "nonimmediate_operand")
1510 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1511 "TARGET_FMA || TARGET_FMA4")
1512
1513 (define_insn "*fma4i_fmadd_<mode>"
1514 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1515 (fma:FMAMODE
1516 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1517 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1518 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1519 "TARGET_FMA4"
1520 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1521 [(set_attr "type" "ssemuladd")
1522 (set_attr "mode" "<MODE>")])
1523
1524 (define_insn "*fma4i_fmsub_<mode>"
1525 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1526 (fma:FMAMODE
1527 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1528 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1529 (neg:FMAMODE
1530 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1531 "TARGET_FMA4"
1532 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1533 [(set_attr "type" "ssemuladd")
1534 (set_attr "mode" "<MODE>")])
1535
1536 (define_insn "*fma4i_fnmadd_<mode>"
1537 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1538 (fma:FMAMODE
1539 (neg:FMAMODE
1540 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1541 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1542 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1543 "TARGET_FMA4"
1544 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1545 [(set_attr "type" "ssemuladd")
1546 (set_attr "mode" "<MODE>")])
1547
1548 (define_insn "*fma4i_fnmsub_<mode>"
1549 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1550 (fma:FMAMODE
1551 (neg:FMAMODE
1552 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1553 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1554 (neg:FMAMODE
1555 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1556 "TARGET_FMA4"
1557 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1558 [(set_attr "type" "ssemuladd")
1559 (set_attr "mode" "<MODE>")])
1560
1561 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1562 ;; entire destination register, with the high-order elements zeroed.
1563
1564 (define_expand "fma4i_vmfmadd_<mode>"
1565 [(set (match_operand:VF_128 0 "register_operand")
1566 (vec_merge:VF_128
1567 (fma:VF_128
1568 (match_operand:VF_128 1 "nonimmediate_operand")
1569 (match_operand:VF_128 2 "nonimmediate_operand")
1570 (match_operand:VF_128 3 "nonimmediate_operand"))
1571 (match_dup 4)
1572 (const_int 1)))]
1573 "TARGET_FMA4"
1574 {
1575 operands[4] = CONST0_RTX (<MODE>mode);
1576 })
1577
1578 (define_insn "*fma4i_vmfmadd_<mode>"
1579 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1580 (vec_merge:VF_128
1581 (fma:VF_128
1582 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1583 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1584 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1585 (match_operand:VF_128 4 "const0_operand" "")
1586 (const_int 1)))]
1587 "TARGET_FMA4"
1588 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1589 [(set_attr "type" "ssemuladd")
1590 (set_attr "mode" "<MODE>")])
1591
1592 (define_insn "*fma4i_vmfmsub_<mode>"
1593 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1594 (vec_merge:VF_128
1595 (fma:VF_128
1596 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1597 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1598 (neg:VF_128
1599 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1600 (match_operand:VF_128 4 "const0_operand" "")
1601 (const_int 1)))]
1602 "TARGET_FMA4"
1603 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1604 [(set_attr "type" "ssemuladd")
1605 (set_attr "mode" "<MODE>")])
1606
1607 (define_insn "*fma4i_vmfnmadd_<mode>"
1608 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1609 (vec_merge:VF_128
1610 (fma:VF_128
1611 (neg:VF_128
1612 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1613 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1614 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1615 (match_operand:VF_128 4 "const0_operand" "")
1616 (const_int 1)))]
1617 "TARGET_FMA4"
1618 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1619 [(set_attr "type" "ssemuladd")
1620 (set_attr "mode" "<MODE>")])
1621
1622 (define_insn "*fma4i_vmfnmsub_<mode>"
1623 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1624 (vec_merge:VF_128
1625 (fma:VF_128
1626 (neg:VF_128
1627 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1628 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1629 (neg:VF_128
1630 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1631 (match_operand:VF_128 4 "const0_operand" "")
1632 (const_int 1)))]
1633 "TARGET_FMA4"
1634 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1635 [(set_attr "type" "ssemuladd")
1636 (set_attr "mode" "<MODE>")])
1637
1638 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1639 ;;
1640 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1641 ;;
1642 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1643
1644 ;; It would be possible to represent these without the UNSPEC as
1645 ;;
1646 ;; (vec_merge
1647 ;; (fma op1 op2 op3)
1648 ;; (fma op1 op2 (neg op3))
1649 ;; (merge-const))
1650 ;;
1651 ;; But this doesn't seem useful in practice.
1652
1653 (define_expand "fmaddsub_<mode>"
1654 [(set (match_operand:VF 0 "register_operand")
1655 (unspec:VF
1656 [(match_operand:VF 1 "nonimmediate_operand")
1657 (match_operand:VF 2 "nonimmediate_operand")
1658 (match_operand:VF 3 "nonimmediate_operand")]
1659 UNSPEC_FMADDSUB))]
1660 "TARGET_FMA || TARGET_FMA4")
1661
1662 (define_insn "*fma4_fmaddsub_<mode>"
1663 [(set (match_operand:VF 0 "register_operand" "=x,x")
1664 (unspec:VF
1665 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1666 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1667 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1668 UNSPEC_FMADDSUB))]
1669 "TARGET_FMA4"
1670 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1671 [(set_attr "type" "ssemuladd")
1672 (set_attr "mode" "<MODE>")])
1673
1674 (define_insn "*fma4_fmsubadd_<mode>"
1675 [(set (match_operand:VF 0 "register_operand" "=x,x")
1676 (unspec:VF
1677 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1678 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1679 (neg:VF
1680 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1681 UNSPEC_FMADDSUB))]
1682 "TARGET_FMA4"
1683 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1684 [(set_attr "type" "ssemuladd")
1685 (set_attr "mode" "<MODE>")])
1686
1687 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1688 ;;
1689 ;; FMA3 floating point multiply/accumulate instructions.
1690 ;;
1691 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1692
1693 (define_insn "*fma_fmadd_<mode>"
1694 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1695 (fma:FMAMODE
1696 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1697 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1698 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1699 "TARGET_FMA"
1700 "@
1701 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1702 vfmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1703 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1704 [(set_attr "type" "ssemuladd")
1705 (set_attr "mode" "<MODE>")])
1706
1707 (define_insn "*fma_fmsub_<mode>"
1708 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1709 (fma:FMAMODE
1710 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1711 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1712 (neg:FMAMODE
1713 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1714 "TARGET_FMA"
1715 "@
1716 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1717 vfmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1718 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1719 [(set_attr "type" "ssemuladd")
1720 (set_attr "mode" "<MODE>")])
1721
1722 (define_insn "*fma_fmadd_<mode>"
1723 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1724 (fma:FMAMODE
1725 (neg:FMAMODE
1726 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1727 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1728 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1729 "TARGET_FMA"
1730 "@
1731 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1732 vfnmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1733 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1734 [(set_attr "type" "ssemuladd")
1735 (set_attr "mode" "<MODE>")])
1736
1737 (define_insn "*fma_fmsub_<mode>"
1738 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1739 (fma:FMAMODE
1740 (neg:FMAMODE
1741 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1742 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1743 (neg:FMAMODE
1744 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1745 "TARGET_FMA"
1746 "@
1747 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1748 vfnmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1749 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1750 [(set_attr "type" "ssemuladd")
1751 (set_attr "mode" "<MODE>")])
1752
1753 (define_insn "*fma_fmaddsub_<mode>"
1754 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
1755 (unspec:VF
1756 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
1757 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
1758 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
1759 UNSPEC_FMADDSUB))]
1760 "TARGET_FMA"
1761 "@
1762 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1763 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1764 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1765 [(set_attr "type" "ssemuladd")
1766 (set_attr "mode" "<MODE>")])
1767
1768 (define_insn "*fma_fmsubadd_<mode>"
1769 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
1770 (unspec:VF
1771 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
1772 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
1773 (neg:VF
1774 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
1775 UNSPEC_FMADDSUB))]
1776 "TARGET_FMA"
1777 "@
1778 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1779 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1780 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1781 [(set_attr "type" "ssemuladd")
1782 (set_attr "mode" "<MODE>")])
1783
1784 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1785 ;;
1786 ;; Parallel single-precision floating point conversion operations
1787 ;;
1788 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1789
1790 (define_insn "sse_cvtpi2ps"
1791 [(set (match_operand:V4SF 0 "register_operand" "=x")
1792 (vec_merge:V4SF
1793 (vec_duplicate:V4SF
1794 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1795 (match_operand:V4SF 1 "register_operand" "0")
1796 (const_int 3)))]
1797 "TARGET_SSE"
1798 "cvtpi2ps\t{%2, %0|%0, %2}"
1799 [(set_attr "type" "ssecvt")
1800 (set_attr "mode" "V4SF")])
1801
1802 (define_insn "sse_cvtps2pi"
1803 [(set (match_operand:V2SI 0 "register_operand" "=y")
1804 (vec_select:V2SI
1805 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1806 UNSPEC_FIX_NOTRUNC)
1807 (parallel [(const_int 0) (const_int 1)])))]
1808 "TARGET_SSE"
1809 "cvtps2pi\t{%1, %0|%0, %1}"
1810 [(set_attr "type" "ssecvt")
1811 (set_attr "unit" "mmx")
1812 (set_attr "mode" "DI")])
1813
1814 (define_insn "sse_cvttps2pi"
1815 [(set (match_operand:V2SI 0 "register_operand" "=y")
1816 (vec_select:V2SI
1817 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1818 (parallel [(const_int 0) (const_int 1)])))]
1819 "TARGET_SSE"
1820 "cvttps2pi\t{%1, %0|%0, %1}"
1821 [(set_attr "type" "ssecvt")
1822 (set_attr "unit" "mmx")
1823 (set_attr "prefix_rep" "0")
1824 (set_attr "mode" "SF")])
1825
1826 (define_insn "sse_cvtsi2ss"
1827 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1828 (vec_merge:V4SF
1829 (vec_duplicate:V4SF
1830 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
1831 (match_operand:V4SF 1 "register_operand" "0,0,x")
1832 (const_int 1)))]
1833 "TARGET_SSE"
1834 "@
1835 cvtsi2ss\t{%2, %0|%0, %2}
1836 cvtsi2ss\t{%2, %0|%0, %2}
1837 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
1838 [(set_attr "isa" "noavx,noavx,avx")
1839 (set_attr "type" "sseicvt")
1840 (set_attr "athlon_decode" "vector,double,*")
1841 (set_attr "amdfam10_decode" "vector,double,*")
1842 (set_attr "bdver1_decode" "double,direct,*")
1843 (set_attr "prefix" "orig,orig,vex")
1844 (set_attr "mode" "SF")])
1845
1846 (define_insn "sse_cvtsi2ssq"
1847 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1848 (vec_merge:V4SF
1849 (vec_duplicate:V4SF
1850 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
1851 (match_operand:V4SF 1 "register_operand" "0,0,x")
1852 (const_int 1)))]
1853 "TARGET_SSE && TARGET_64BIT"
1854 "@
1855 cvtsi2ssq\t{%2, %0|%0, %2}
1856 cvtsi2ssq\t{%2, %0|%0, %2}
1857 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
1858 [(set_attr "isa" "noavx,noavx,avx")
1859 (set_attr "type" "sseicvt")
1860 (set_attr "athlon_decode" "vector,double,*")
1861 (set_attr "amdfam10_decode" "vector,double,*")
1862 (set_attr "bdver1_decode" "double,direct,*")
1863 (set_attr "length_vex" "*,*,4")
1864 (set_attr "prefix_rex" "1,1,*")
1865 (set_attr "prefix" "orig,orig,vex")
1866 (set_attr "mode" "SF")])
1867
1868 (define_insn "sse_cvtss2si"
1869 [(set (match_operand:SI 0 "register_operand" "=r,r")
1870 (unspec:SI
1871 [(vec_select:SF
1872 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1873 (parallel [(const_int 0)]))]
1874 UNSPEC_FIX_NOTRUNC))]
1875 "TARGET_SSE"
1876 "%vcvtss2si\t{%1, %0|%0, %1}"
1877 [(set_attr "type" "sseicvt")
1878 (set_attr "athlon_decode" "double,vector")
1879 (set_attr "bdver1_decode" "double,double")
1880 (set_attr "prefix_rep" "1")
1881 (set_attr "prefix" "maybe_vex")
1882 (set_attr "mode" "SI")])
1883
1884 (define_insn "sse_cvtss2si_2"
1885 [(set (match_operand:SI 0 "register_operand" "=r,r")
1886 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1887 UNSPEC_FIX_NOTRUNC))]
1888 "TARGET_SSE"
1889 "%vcvtss2si\t{%1, %0|%0, %1}"
1890 [(set_attr "type" "sseicvt")
1891 (set_attr "athlon_decode" "double,vector")
1892 (set_attr "amdfam10_decode" "double,double")
1893 (set_attr "bdver1_decode" "double,double")
1894 (set_attr "prefix_rep" "1")
1895 (set_attr "prefix" "maybe_vex")
1896 (set_attr "mode" "SI")])
1897
1898 (define_insn "sse_cvtss2siq"
1899 [(set (match_operand:DI 0 "register_operand" "=r,r")
1900 (unspec:DI
1901 [(vec_select:SF
1902 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1903 (parallel [(const_int 0)]))]
1904 UNSPEC_FIX_NOTRUNC))]
1905 "TARGET_SSE && TARGET_64BIT"
1906 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
1907 [(set_attr "type" "sseicvt")
1908 (set_attr "athlon_decode" "double,vector")
1909 (set_attr "bdver1_decode" "double,double")
1910 (set_attr "prefix_rep" "1")
1911 (set_attr "prefix" "maybe_vex")
1912 (set_attr "mode" "DI")])
1913
1914 (define_insn "sse_cvtss2siq_2"
1915 [(set (match_operand:DI 0 "register_operand" "=r,r")
1916 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1917 UNSPEC_FIX_NOTRUNC))]
1918 "TARGET_SSE && TARGET_64BIT"
1919 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
1920 [(set_attr "type" "sseicvt")
1921 (set_attr "athlon_decode" "double,vector")
1922 (set_attr "amdfam10_decode" "double,double")
1923 (set_attr "bdver1_decode" "double,double")
1924 (set_attr "prefix_rep" "1")
1925 (set_attr "prefix" "maybe_vex")
1926 (set_attr "mode" "DI")])
1927
1928 (define_insn "sse_cvttss2si"
1929 [(set (match_operand:SI 0 "register_operand" "=r,r")
1930 (fix:SI
1931 (vec_select:SF
1932 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1933 (parallel [(const_int 0)]))))]
1934 "TARGET_SSE"
1935 "%vcvttss2si\t{%1, %0|%0, %1}"
1936 [(set_attr "type" "sseicvt")
1937 (set_attr "athlon_decode" "double,vector")
1938 (set_attr "amdfam10_decode" "double,double")
1939 (set_attr "bdver1_decode" "double,double")
1940 (set_attr "prefix_rep" "1")
1941 (set_attr "prefix" "maybe_vex")
1942 (set_attr "mode" "SI")])
1943
1944 (define_insn "sse_cvttss2siq"
1945 [(set (match_operand:DI 0 "register_operand" "=r,r")
1946 (fix:DI
1947 (vec_select:SF
1948 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1949 (parallel [(const_int 0)]))))]
1950 "TARGET_SSE && TARGET_64BIT"
1951 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
1952 [(set_attr "type" "sseicvt")
1953 (set_attr "athlon_decode" "double,vector")
1954 (set_attr "amdfam10_decode" "double,double")
1955 (set_attr "bdver1_decode" "double,double")
1956 (set_attr "prefix_rep" "1")
1957 (set_attr "prefix" "maybe_vex")
1958 (set_attr "mode" "DI")])
1959
1960 (define_insn "avx_cvtdq2ps256"
1961 [(set (match_operand:V8SF 0 "register_operand" "=x")
1962 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
1963 "TARGET_AVX"
1964 "vcvtdq2ps\t{%1, %0|%0, %1}"
1965 [(set_attr "type" "ssecvt")
1966 (set_attr "prefix" "vex")
1967 (set_attr "mode" "V8SF")])
1968
1969 (define_insn "sse2_cvtdq2ps"
1970 [(set (match_operand:V4SF 0 "register_operand" "=x")
1971 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1972 "TARGET_SSE2"
1973 "%vcvtdq2ps\t{%1, %0|%0, %1}"
1974 [(set_attr "type" "ssecvt")
1975 (set_attr "prefix" "maybe_vex")
1976 (set_attr "mode" "V4SF")])
1977
1978 (define_expand "sse2_cvtudq2ps"
1979 [(set (match_dup 5)
1980 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
1981 (set (match_dup 6)
1982 (lt:V4SF (match_dup 5) (match_dup 3)))
1983 (set (match_dup 7)
1984 (and:V4SF (match_dup 6) (match_dup 4)))
1985 (set (match_operand:V4SF 0 "register_operand" "")
1986 (plus:V4SF (match_dup 5) (match_dup 7)))]
1987 "TARGET_SSE2"
1988 {
1989 REAL_VALUE_TYPE TWO32r;
1990 rtx x;
1991 int i;
1992
1993 real_ldexp (&TWO32r, &dconst1, 32);
1994 x = const_double_from_real_value (TWO32r, SFmode);
1995
1996 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
1997 operands[4] = force_reg (V4SFmode,
1998 ix86_build_const_vector (V4SFmode, 1, x));
1999
2000 for (i = 5; i < 8; i++)
2001 operands[i] = gen_reg_rtx (V4SFmode);
2002 })
2003
2004 (define_insn "avx_cvtps2dq256"
2005 [(set (match_operand:V8SI 0 "register_operand" "=x")
2006 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2007 UNSPEC_FIX_NOTRUNC))]
2008 "TARGET_AVX"
2009 "vcvtps2dq\t{%1, %0|%0, %1}"
2010 [(set_attr "type" "ssecvt")
2011 (set_attr "prefix" "vex")
2012 (set_attr "mode" "OI")])
2013
2014 (define_insn "sse2_cvtps2dq"
2015 [(set (match_operand:V4SI 0 "register_operand" "=x")
2016 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2017 UNSPEC_FIX_NOTRUNC))]
2018 "TARGET_SSE2"
2019 "%vcvtps2dq\t{%1, %0|%0, %1}"
2020 [(set_attr "type" "ssecvt")
2021 (set (attr "prefix_data16")
2022 (if_then_else
2023 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2024 (const_string "*")
2025 (const_string "1")))
2026 (set_attr "prefix" "maybe_vex")
2027 (set_attr "mode" "TI")])
2028
2029 (define_insn "avx_cvttps2dq256"
2030 [(set (match_operand:V8SI 0 "register_operand" "=x")
2031 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2032 "TARGET_AVX"
2033 "vcvttps2dq\t{%1, %0|%0, %1}"
2034 [(set_attr "type" "ssecvt")
2035 (set_attr "prefix" "vex")
2036 (set_attr "mode" "OI")])
2037
2038 (define_insn "sse2_cvttps2dq"
2039 [(set (match_operand:V4SI 0 "register_operand" "=x")
2040 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2041 "TARGET_SSE2"
2042 "%vcvttps2dq\t{%1, %0|%0, %1}"
2043 [(set_attr "type" "ssecvt")
2044 (set (attr "prefix_rep")
2045 (if_then_else
2046 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2047 (const_string "*")
2048 (const_string "1")))
2049 (set (attr "prefix_data16")
2050 (if_then_else
2051 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2052 (const_string "*")
2053 (const_string "0")))
2054 (set_attr "prefix_data16" "0")
2055 (set_attr "prefix" "maybe_vex")
2056 (set_attr "mode" "TI")])
2057
2058 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2059 ;;
2060 ;; Parallel double-precision floating point conversion operations
2061 ;;
2062 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2063
2064 (define_insn "sse2_cvtpi2pd"
2065 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2066 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2067 "TARGET_SSE2"
2068 "cvtpi2pd\t{%1, %0|%0, %1}"
2069 [(set_attr "type" "ssecvt")
2070 (set_attr "unit" "mmx,*")
2071 (set_attr "prefix_data16" "1,*")
2072 (set_attr "mode" "V2DF")])
2073
2074 (define_insn "sse2_cvtpd2pi"
2075 [(set (match_operand:V2SI 0 "register_operand" "=y")
2076 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2077 UNSPEC_FIX_NOTRUNC))]
2078 "TARGET_SSE2"
2079 "cvtpd2pi\t{%1, %0|%0, %1}"
2080 [(set_attr "type" "ssecvt")
2081 (set_attr "unit" "mmx")
2082 (set_attr "bdver1_decode" "double")
2083 (set_attr "prefix_data16" "1")
2084 (set_attr "mode" "DI")])
2085
2086 (define_insn "sse2_cvttpd2pi"
2087 [(set (match_operand:V2SI 0 "register_operand" "=y")
2088 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2089 "TARGET_SSE2"
2090 "cvttpd2pi\t{%1, %0|%0, %1}"
2091 [(set_attr "type" "ssecvt")
2092 (set_attr "unit" "mmx")
2093 (set_attr "bdver1_decode" "double")
2094 (set_attr "prefix_data16" "1")
2095 (set_attr "mode" "TI")])
2096
2097 (define_insn "sse2_cvtsi2sd"
2098 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2099 (vec_merge:V2DF
2100 (vec_duplicate:V2DF
2101 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2102 (match_operand:V2DF 1 "register_operand" "0,0,x")
2103 (const_int 1)))]
2104 "TARGET_SSE2"
2105 "@
2106 cvtsi2sd\t{%2, %0|%0, %2}
2107 cvtsi2sd\t{%2, %0|%0, %2}
2108 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2109 [(set_attr "isa" "noavx,noavx,avx")
2110 (set_attr "type" "sseicvt")
2111 (set_attr "athlon_decode" "double,direct,*")
2112 (set_attr "amdfam10_decode" "vector,double,*")
2113 (set_attr "bdver1_decode" "double,direct,*")
2114 (set_attr "prefix" "orig,orig,vex")
2115 (set_attr "mode" "DF")])
2116
2117 (define_insn "sse2_cvtsi2sdq"
2118 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2119 (vec_merge:V2DF
2120 (vec_duplicate:V2DF
2121 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2122 (match_operand:V2DF 1 "register_operand" "0,0,x")
2123 (const_int 1)))]
2124 "TARGET_SSE2 && TARGET_64BIT"
2125 "@
2126 cvtsi2sdq\t{%2, %0|%0, %2}
2127 cvtsi2sdq\t{%2, %0|%0, %2}
2128 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2129 [(set_attr "isa" "noavx,noavx,avx")
2130 (set_attr "type" "sseicvt")
2131 (set_attr "athlon_decode" "double,direct,*")
2132 (set_attr "amdfam10_decode" "vector,double,*")
2133 (set_attr "bdver1_decode" "double,direct,*")
2134 (set_attr "length_vex" "*,*,4")
2135 (set_attr "prefix_rex" "1,1,*")
2136 (set_attr "prefix" "orig,orig,vex")
2137 (set_attr "mode" "DF")])
2138
2139 (define_insn "sse2_cvtsd2si"
2140 [(set (match_operand:SI 0 "register_operand" "=r,r")
2141 (unspec:SI
2142 [(vec_select:DF
2143 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2144 (parallel [(const_int 0)]))]
2145 UNSPEC_FIX_NOTRUNC))]
2146 "TARGET_SSE2"
2147 "%vcvtsd2si\t{%1, %0|%0, %1}"
2148 [(set_attr "type" "sseicvt")
2149 (set_attr "athlon_decode" "double,vector")
2150 (set_attr "bdver1_decode" "double,double")
2151 (set_attr "prefix_rep" "1")
2152 (set_attr "prefix" "maybe_vex")
2153 (set_attr "mode" "SI")])
2154
2155 (define_insn "sse2_cvtsd2si_2"
2156 [(set (match_operand:SI 0 "register_operand" "=r,r")
2157 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2158 UNSPEC_FIX_NOTRUNC))]
2159 "TARGET_SSE2"
2160 "%vcvtsd2si\t{%1, %0|%0, %1}"
2161 [(set_attr "type" "sseicvt")
2162 (set_attr "athlon_decode" "double,vector")
2163 (set_attr "amdfam10_decode" "double,double")
2164 (set_attr "bdver1_decode" "double,double")
2165 (set_attr "prefix_rep" "1")
2166 (set_attr "prefix" "maybe_vex")
2167 (set_attr "mode" "SI")])
2168
2169 (define_insn "sse2_cvtsd2siq"
2170 [(set (match_operand:DI 0 "register_operand" "=r,r")
2171 (unspec:DI
2172 [(vec_select:DF
2173 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2174 (parallel [(const_int 0)]))]
2175 UNSPEC_FIX_NOTRUNC))]
2176 "TARGET_SSE2 && TARGET_64BIT"
2177 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2178 [(set_attr "type" "sseicvt")
2179 (set_attr "athlon_decode" "double,vector")
2180 (set_attr "bdver1_decode" "double,double")
2181 (set_attr "prefix_rep" "1")
2182 (set_attr "prefix" "maybe_vex")
2183 (set_attr "mode" "DI")])
2184
2185 (define_insn "sse2_cvtsd2siq_2"
2186 [(set (match_operand:DI 0 "register_operand" "=r,r")
2187 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2188 UNSPEC_FIX_NOTRUNC))]
2189 "TARGET_SSE2 && TARGET_64BIT"
2190 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2191 [(set_attr "type" "sseicvt")
2192 (set_attr "athlon_decode" "double,vector")
2193 (set_attr "amdfam10_decode" "double,double")
2194 (set_attr "bdver1_decode" "double,double")
2195 (set_attr "prefix_rep" "1")
2196 (set_attr "prefix" "maybe_vex")
2197 (set_attr "mode" "DI")])
2198
2199 (define_insn "sse2_cvttsd2si"
2200 [(set (match_operand:SI 0 "register_operand" "=r,r")
2201 (fix:SI
2202 (vec_select:DF
2203 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2204 (parallel [(const_int 0)]))))]
2205 "TARGET_SSE2"
2206 "%vcvttsd2si\t{%1, %0|%0, %1}"
2207 [(set_attr "type" "sseicvt")
2208 (set_attr "athlon_decode" "double,vector")
2209 (set_attr "amdfam10_decode" "double,double")
2210 (set_attr "bdver1_decode" "double,double")
2211 (set_attr "prefix_rep" "1")
2212 (set_attr "prefix" "maybe_vex")
2213 (set_attr "mode" "SI")])
2214
2215 (define_insn "sse2_cvttsd2siq"
2216 [(set (match_operand:DI 0 "register_operand" "=r,r")
2217 (fix:DI
2218 (vec_select:DF
2219 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2220 (parallel [(const_int 0)]))))]
2221 "TARGET_SSE2 && TARGET_64BIT"
2222 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2223 [(set_attr "type" "sseicvt")
2224 (set_attr "athlon_decode" "double,vector")
2225 (set_attr "amdfam10_decode" "double,double")
2226 (set_attr "bdver1_decode" "double,double")
2227 (set_attr "prefix_rep" "1")
2228 (set_attr "prefix" "maybe_vex")
2229 (set_attr "mode" "DI")])
2230
2231 (define_insn "avx_cvtdq2pd256"
2232 [(set (match_operand:V4DF 0 "register_operand" "=x")
2233 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2234 "TARGET_AVX"
2235 "vcvtdq2pd\t{%1, %0|%0, %1}"
2236 [(set_attr "type" "ssecvt")
2237 (set_attr "prefix" "vex")
2238 (set_attr "mode" "V4DF")])
2239
2240 (define_insn "*avx_cvtdq2pd256_2"
2241 [(set (match_operand:V4DF 0 "register_operand" "=x")
2242 (float:V4DF
2243 (vec_select:V4SI
2244 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2245 (parallel [(const_int 0) (const_int 1)
2246 (const_int 2) (const_int 3)]))))]
2247 "TARGET_AVX"
2248 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2249 [(set_attr "type" "ssecvt")
2250 (set_attr "prefix" "vex")
2251 (set_attr "mode" "V4DF")])
2252
2253 (define_insn "sse2_cvtdq2pd"
2254 [(set (match_operand:V2DF 0 "register_operand" "=x")
2255 (float:V2DF
2256 (vec_select:V2SI
2257 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2258 (parallel [(const_int 0) (const_int 1)]))))]
2259 "TARGET_SSE2"
2260 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2261 [(set_attr "type" "ssecvt")
2262 (set_attr "prefix" "maybe_vex")
2263 (set_attr "mode" "V2DF")])
2264
2265 (define_insn "avx_cvtpd2dq256"
2266 [(set (match_operand:V4SI 0 "register_operand" "=x")
2267 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2268 UNSPEC_FIX_NOTRUNC))]
2269 "TARGET_AVX"
2270 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2271 [(set_attr "type" "ssecvt")
2272 (set_attr "prefix" "vex")
2273 (set_attr "mode" "OI")])
2274
2275 (define_expand "sse2_cvtpd2dq"
2276 [(set (match_operand:V4SI 0 "register_operand" "")
2277 (vec_concat:V4SI
2278 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2279 UNSPEC_FIX_NOTRUNC)
2280 (match_dup 2)))]
2281 "TARGET_SSE2"
2282 "operands[2] = CONST0_RTX (V2SImode);")
2283
2284 (define_insn "*sse2_cvtpd2dq"
2285 [(set (match_operand:V4SI 0 "register_operand" "=x")
2286 (vec_concat:V4SI
2287 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2288 UNSPEC_FIX_NOTRUNC)
2289 (match_operand:V2SI 2 "const0_operand" "")))]
2290 "TARGET_SSE2"
2291 {
2292 if (TARGET_AVX)
2293 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2294 else
2295 return "cvtpd2dq\t{%1, %0|%0, %1}";
2296 }
2297 [(set_attr "type" "ssecvt")
2298 (set_attr "prefix_rep" "1")
2299 (set_attr "prefix_data16" "0")
2300 (set_attr "prefix" "maybe_vex")
2301 (set_attr "mode" "TI")
2302 (set_attr "amdfam10_decode" "double")
2303 (set_attr "athlon_decode" "vector")
2304 (set_attr "bdver1_decode" "double")])
2305
2306 (define_insn "avx_cvttpd2dq256"
2307 [(set (match_operand:V4SI 0 "register_operand" "=x")
2308 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2309 "TARGET_AVX"
2310 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2311 [(set_attr "type" "ssecvt")
2312 (set_attr "prefix" "vex")
2313 (set_attr "mode" "OI")])
2314
2315 (define_expand "sse2_cvttpd2dq"
2316 [(set (match_operand:V4SI 0 "register_operand" "")
2317 (vec_concat:V4SI
2318 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2319 (match_dup 2)))]
2320 "TARGET_SSE2"
2321 "operands[2] = CONST0_RTX (V2SImode);")
2322
2323 (define_insn "*sse2_cvttpd2dq"
2324 [(set (match_operand:V4SI 0 "register_operand" "=x")
2325 (vec_concat:V4SI
2326 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2327 (match_operand:V2SI 2 "const0_operand" "")))]
2328 "TARGET_SSE2"
2329 {
2330 if (TARGET_AVX)
2331 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2332 else
2333 return "cvttpd2dq\t{%1, %0|%0, %1}";
2334 }
2335 [(set_attr "type" "ssecvt")
2336 (set_attr "amdfam10_decode" "double")
2337 (set_attr "athlon_decode" "vector")
2338 (set_attr "bdver1_decode" "double")
2339 (set_attr "prefix" "maybe_vex")
2340 (set_attr "mode" "TI")])
2341
2342 (define_insn "sse2_cvtsd2ss"
2343 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2344 (vec_merge:V4SF
2345 (vec_duplicate:V4SF
2346 (float_truncate:V2SF
2347 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2348 (match_operand:V4SF 1 "register_operand" "0,0,x")
2349 (const_int 1)))]
2350 "TARGET_SSE2"
2351 "@
2352 cvtsd2ss\t{%2, %0|%0, %2}
2353 cvtsd2ss\t{%2, %0|%0, %2}
2354 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2355 [(set_attr "isa" "noavx,noavx,avx")
2356 (set_attr "type" "ssecvt")
2357 (set_attr "athlon_decode" "vector,double,*")
2358 (set_attr "amdfam10_decode" "vector,double,*")
2359 (set_attr "bdver1_decode" "direct,direct,*")
2360 (set_attr "prefix" "orig,orig,vex")
2361 (set_attr "mode" "SF")])
2362
2363 (define_insn "sse2_cvtss2sd"
2364 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2365 (vec_merge:V2DF
2366 (float_extend:V2DF
2367 (vec_select:V2SF
2368 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2369 (parallel [(const_int 0) (const_int 1)])))
2370 (match_operand:V2DF 1 "register_operand" "0,0,x")
2371 (const_int 1)))]
2372 "TARGET_SSE2"
2373 "@
2374 cvtss2sd\t{%2, %0|%0, %2}
2375 cvtss2sd\t{%2, %0|%0, %2}
2376 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2377 [(set_attr "isa" "noavx,noavx,avx")
2378 (set_attr "type" "ssecvt")
2379 (set_attr "amdfam10_decode" "vector,double,*")
2380 (set_attr "athlon_decode" "direct,direct,*")
2381 (set_attr "bdver1_decode" "direct,direct,*")
2382 (set_attr "prefix" "orig,orig,vex")
2383 (set_attr "mode" "DF")])
2384
2385 (define_insn "avx_cvtpd2ps256"
2386 [(set (match_operand:V4SF 0 "register_operand" "=x")
2387 (float_truncate:V4SF
2388 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2389 "TARGET_AVX"
2390 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2391 [(set_attr "type" "ssecvt")
2392 (set_attr "prefix" "vex")
2393 (set_attr "mode" "V4SF")])
2394
2395 (define_expand "sse2_cvtpd2ps"
2396 [(set (match_operand:V4SF 0 "register_operand" "")
2397 (vec_concat:V4SF
2398 (float_truncate:V2SF
2399 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2400 (match_dup 2)))]
2401 "TARGET_SSE2"
2402 "operands[2] = CONST0_RTX (V2SFmode);")
2403
2404 (define_insn "*sse2_cvtpd2ps"
2405 [(set (match_operand:V4SF 0 "register_operand" "=x")
2406 (vec_concat:V4SF
2407 (float_truncate:V2SF
2408 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2409 (match_operand:V2SF 2 "const0_operand" "")))]
2410 "TARGET_SSE2"
2411 {
2412 if (TARGET_AVX)
2413 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2414 else
2415 return "cvtpd2ps\t{%1, %0|%0, %1}";
2416 }
2417 [(set_attr "type" "ssecvt")
2418 (set_attr "amdfam10_decode" "double")
2419 (set_attr "athlon_decode" "vector")
2420 (set_attr "bdver1_decode" "double")
2421 (set_attr "prefix_data16" "1")
2422 (set_attr "prefix" "maybe_vex")
2423 (set_attr "mode" "V4SF")])
2424
2425 (define_insn "avx_cvtps2pd256"
2426 [(set (match_operand:V4DF 0 "register_operand" "=x")
2427 (float_extend:V4DF
2428 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2429 "TARGET_AVX"
2430 "vcvtps2pd\t{%1, %0|%0, %1}"
2431 [(set_attr "type" "ssecvt")
2432 (set_attr "prefix" "vex")
2433 (set_attr "mode" "V4DF")])
2434
2435 (define_insn "*avx_cvtps2pd256_2"
2436 [(set (match_operand:V4DF 0 "register_operand" "=x")
2437 (float_extend:V4DF
2438 (vec_select:V4SF
2439 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2440 (parallel [(const_int 0) (const_int 1)
2441 (const_int 2) (const_int 3)]))))]
2442 "TARGET_AVX"
2443 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2444 [(set_attr "type" "ssecvt")
2445 (set_attr "prefix" "vex")
2446 (set_attr "mode" "V4DF")])
2447
2448 (define_insn "sse2_cvtps2pd"
2449 [(set (match_operand:V2DF 0 "register_operand" "=x")
2450 (float_extend:V2DF
2451 (vec_select:V2SF
2452 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2453 (parallel [(const_int 0) (const_int 1)]))))]
2454 "TARGET_SSE2"
2455 "%vcvtps2pd\t{%1, %0|%0, %1}"
2456 [(set_attr "type" "ssecvt")
2457 (set_attr "amdfam10_decode" "direct")
2458 (set_attr "athlon_decode" "double")
2459 (set_attr "bdver1_decode" "double")
2460 (set_attr "prefix_data16" "0")
2461 (set_attr "prefix" "maybe_vex")
2462 (set_attr "mode" "V2DF")])
2463
2464 (define_expand "vec_unpacks_hi_v4sf"
2465 [(set (match_dup 2)
2466 (vec_select:V4SF
2467 (vec_concat:V8SF
2468 (match_dup 2)
2469 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2470 (parallel [(const_int 6) (const_int 7)
2471 (const_int 2) (const_int 3)])))
2472 (set (match_operand:V2DF 0 "register_operand" "")
2473 (float_extend:V2DF
2474 (vec_select:V2SF
2475 (match_dup 2)
2476 (parallel [(const_int 0) (const_int 1)]))))]
2477 "TARGET_SSE2"
2478 "operands[2] = gen_reg_rtx (V4SFmode);")
2479
2480 (define_expand "vec_unpacks_hi_v8sf"
2481 [(set (match_dup 2)
2482 (vec_select:V4SF
2483 (match_operand:V8SF 1 "nonimmediate_operand" "")
2484 (parallel [(const_int 4) (const_int 5)
2485 (const_int 6) (const_int 7)])))
2486 (set (match_operand:V4DF 0 "register_operand" "")
2487 (float_extend:V4DF
2488 (match_dup 2)))]
2489 "TARGET_AVX"
2490 "operands[2] = gen_reg_rtx (V4SFmode);")
2491
2492 (define_expand "vec_unpacks_lo_v4sf"
2493 [(set (match_operand:V2DF 0 "register_operand" "")
2494 (float_extend:V2DF
2495 (vec_select:V2SF
2496 (match_operand:V4SF 1 "nonimmediate_operand" "")
2497 (parallel [(const_int 0) (const_int 1)]))))]
2498 "TARGET_SSE2")
2499
2500 (define_expand "vec_unpacks_lo_v8sf"
2501 [(set (match_operand:V4DF 0 "register_operand" "")
2502 (float_extend:V4DF
2503 (vec_select:V4SF
2504 (match_operand:V8SF 1 "nonimmediate_operand" "")
2505 (parallel [(const_int 0) (const_int 1)
2506 (const_int 2) (const_int 3)]))))]
2507 "TARGET_AVX")
2508
2509 (define_expand "vec_unpacks_float_hi_v8hi"
2510 [(match_operand:V4SF 0 "register_operand" "")
2511 (match_operand:V8HI 1 "register_operand" "")]
2512 "TARGET_SSE2"
2513 {
2514 rtx tmp = gen_reg_rtx (V4SImode);
2515
2516 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2517 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2518 DONE;
2519 })
2520
2521 (define_expand "vec_unpacks_float_lo_v8hi"
2522 [(match_operand:V4SF 0 "register_operand" "")
2523 (match_operand:V8HI 1 "register_operand" "")]
2524 "TARGET_SSE2"
2525 {
2526 rtx tmp = gen_reg_rtx (V4SImode);
2527
2528 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2529 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2530 DONE;
2531 })
2532
2533 (define_expand "vec_unpacku_float_hi_v8hi"
2534 [(match_operand:V4SF 0 "register_operand" "")
2535 (match_operand:V8HI 1 "register_operand" "")]
2536 "TARGET_SSE2"
2537 {
2538 rtx tmp = gen_reg_rtx (V4SImode);
2539
2540 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2541 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2542 DONE;
2543 })
2544
2545 (define_expand "vec_unpacku_float_lo_v8hi"
2546 [(match_operand:V4SF 0 "register_operand" "")
2547 (match_operand:V8HI 1 "register_operand" "")]
2548 "TARGET_SSE2"
2549 {
2550 rtx tmp = gen_reg_rtx (V4SImode);
2551
2552 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2553 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2554 DONE;
2555 })
2556
2557 (define_expand "vec_unpacks_float_hi_v4si"
2558 [(set (match_dup 2)
2559 (vec_select:V4SI
2560 (match_operand:V4SI 1 "nonimmediate_operand" "")
2561 (parallel [(const_int 2) (const_int 3)
2562 (const_int 2) (const_int 3)])))
2563 (set (match_operand:V2DF 0 "register_operand" "")
2564 (float:V2DF
2565 (vec_select:V2SI
2566 (match_dup 2)
2567 (parallel [(const_int 0) (const_int 1)]))))]
2568 "TARGET_SSE2"
2569 "operands[2] = gen_reg_rtx (V4SImode);")
2570
2571 (define_expand "vec_unpacks_float_lo_v4si"
2572 [(set (match_operand:V2DF 0 "register_operand" "")
2573 (float:V2DF
2574 (vec_select:V2SI
2575 (match_operand:V4SI 1 "nonimmediate_operand" "")
2576 (parallel [(const_int 0) (const_int 1)]))))]
2577 "TARGET_SSE2")
2578
2579 (define_expand "vec_unpacks_float_hi_v8si"
2580 [(set (match_dup 2)
2581 (vec_select:V4SI
2582 (match_operand:V8SI 1 "nonimmediate_operand" "")
2583 (parallel [(const_int 4) (const_int 5)
2584 (const_int 6) (const_int 7)])))
2585 (set (match_operand:V4DF 0 "register_operand" "")
2586 (float:V4DF
2587 (match_dup 2)))]
2588 "TARGET_AVX"
2589 "operands[2] = gen_reg_rtx (V4SImode);")
2590
2591 (define_expand "vec_unpacks_float_lo_v8si"
2592 [(set (match_operand:V4DF 0 "register_operand" "")
2593 (float:V4DF
2594 (vec_select:V4SI
2595 (match_operand:V8SI 1 "nonimmediate_operand" "")
2596 (parallel [(const_int 0) (const_int 1)
2597 (const_int 2) (const_int 3)]))))]
2598 "TARGET_AVX")
2599
2600 (define_expand "vec_unpacku_float_hi_v4si"
2601 [(set (match_dup 5)
2602 (vec_select:V4SI
2603 (match_operand:V4SI 1 "nonimmediate_operand" "")
2604 (parallel [(const_int 2) (const_int 3)
2605 (const_int 2) (const_int 3)])))
2606 (set (match_dup 6)
2607 (float:V2DF
2608 (vec_select:V2SI
2609 (match_dup 5)
2610 (parallel [(const_int 0) (const_int 1)]))))
2611 (set (match_dup 7)
2612 (lt:V2DF (match_dup 6) (match_dup 3)))
2613 (set (match_dup 8)
2614 (and:V2DF (match_dup 7) (match_dup 4)))
2615 (set (match_operand:V2DF 0 "register_operand" "")
2616 (plus:V2DF (match_dup 6) (match_dup 8)))]
2617 "TARGET_SSE2"
2618 {
2619 REAL_VALUE_TYPE TWO32r;
2620 rtx x;
2621 int i;
2622
2623 real_ldexp (&TWO32r, &dconst1, 32);
2624 x = const_double_from_real_value (TWO32r, DFmode);
2625
2626 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2627 operands[4] = force_reg (V2DFmode,
2628 ix86_build_const_vector (V2DFmode, 1, x));
2629
2630 operands[5] = gen_reg_rtx (V4SImode);
2631
2632 for (i = 6; i < 9; i++)
2633 operands[i] = gen_reg_rtx (V2DFmode);
2634 })
2635
2636 (define_expand "vec_unpacku_float_lo_v4si"
2637 [(set (match_dup 5)
2638 (float:V2DF
2639 (vec_select:V2SI
2640 (match_operand:V4SI 1 "nonimmediate_operand" "")
2641 (parallel [(const_int 0) (const_int 1)]))))
2642 (set (match_dup 6)
2643 (lt:V2DF (match_dup 5) (match_dup 3)))
2644 (set (match_dup 7)
2645 (and:V2DF (match_dup 6) (match_dup 4)))
2646 (set (match_operand:V2DF 0 "register_operand" "")
2647 (plus:V2DF (match_dup 5) (match_dup 7)))]
2648 "TARGET_SSE2"
2649 {
2650 REAL_VALUE_TYPE TWO32r;
2651 rtx x;
2652 int i;
2653
2654 real_ldexp (&TWO32r, &dconst1, 32);
2655 x = const_double_from_real_value (TWO32r, DFmode);
2656
2657 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2658 operands[4] = force_reg (V2DFmode,
2659 ix86_build_const_vector (V2DFmode, 1, x));
2660
2661 for (i = 5; i < 8; i++)
2662 operands[i] = gen_reg_rtx (V2DFmode);
2663 })
2664
2665 (define_expand "vec_pack_trunc_v4df"
2666 [(set (match_dup 3)
2667 (float_truncate:V4SF
2668 (match_operand:V4DF 1 "nonimmediate_operand" "")))
2669 (set (match_dup 4)
2670 (float_truncate:V4SF
2671 (match_operand:V4DF 2 "nonimmediate_operand" "")))
2672 (set (match_operand:V8SF 0 "register_operand" "")
2673 (vec_concat:V8SF
2674 (match_dup 3)
2675 (match_dup 4)))]
2676 "TARGET_AVX"
2677 {
2678 operands[3] = gen_reg_rtx (V4SFmode);
2679 operands[4] = gen_reg_rtx (V4SFmode);
2680 })
2681
2682 (define_expand "vec_pack_trunc_v2df"
2683 [(match_operand:V4SF 0 "register_operand" "")
2684 (match_operand:V2DF 1 "nonimmediate_operand" "")
2685 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2686 "TARGET_SSE2"
2687 {
2688 rtx r1, r2;
2689
2690 r1 = gen_reg_rtx (V4SFmode);
2691 r2 = gen_reg_rtx (V4SFmode);
2692
2693 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2694 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2695 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2696 DONE;
2697 })
2698
2699 (define_expand "vec_pack_sfix_trunc_v2df"
2700 [(match_operand:V4SI 0 "register_operand" "")
2701 (match_operand:V2DF 1 "nonimmediate_operand" "")
2702 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2703 "TARGET_SSE2"
2704 {
2705 rtx r1, r2;
2706
2707 r1 = gen_reg_rtx (V4SImode);
2708 r2 = gen_reg_rtx (V4SImode);
2709
2710 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2711 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2712 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2713 gen_lowpart (V2DImode, r1),
2714 gen_lowpart (V2DImode, r2)));
2715 DONE;
2716 })
2717
2718 (define_expand "vec_pack_sfix_v2df"
2719 [(match_operand:V4SI 0 "register_operand" "")
2720 (match_operand:V2DF 1 "nonimmediate_operand" "")
2721 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2722 "TARGET_SSE2"
2723 {
2724 rtx r1, r2;
2725
2726 r1 = gen_reg_rtx (V4SImode);
2727 r2 = gen_reg_rtx (V4SImode);
2728
2729 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2730 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2731 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2732 gen_lowpart (V2DImode, r1),
2733 gen_lowpart (V2DImode, r2)));
2734 DONE;
2735 })
2736
2737 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2738 ;;
2739 ;; Parallel single-precision floating point element swizzling
2740 ;;
2741 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2742
2743 (define_expand "sse_movhlps_exp"
2744 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2745 (vec_select:V4SF
2746 (vec_concat:V8SF
2747 (match_operand:V4SF 1 "nonimmediate_operand" "")
2748 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2749 (parallel [(const_int 6)
2750 (const_int 7)
2751 (const_int 2)
2752 (const_int 3)])))]
2753 "TARGET_SSE"
2754 {
2755 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2756
2757 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
2758
2759 /* Fix up the destination if needed. */
2760 if (dst != operands[0])
2761 emit_move_insn (operands[0], dst);
2762
2763 DONE;
2764 })
2765
2766 (define_insn "sse_movhlps"
2767 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
2768 (vec_select:V4SF
2769 (vec_concat:V8SF
2770 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2771 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
2772 (parallel [(const_int 6)
2773 (const_int 7)
2774 (const_int 2)
2775 (const_int 3)])))]
2776 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2777 "@
2778 movhlps\t{%2, %0|%0, %2}
2779 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2780 movlps\t{%H2, %0|%0, %H2}
2781 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
2782 %vmovhps\t{%2, %0|%0, %2}"
2783 [(set_attr "isa" "noavx,avx,noavx,avx,base")
2784 (set_attr "type" "ssemov")
2785 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2786 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2787
2788 (define_expand "sse_movlhps_exp"
2789 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2790 (vec_select:V4SF
2791 (vec_concat:V8SF
2792 (match_operand:V4SF 1 "nonimmediate_operand" "")
2793 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2794 (parallel [(const_int 0)
2795 (const_int 1)
2796 (const_int 4)
2797 (const_int 5)])))]
2798 "TARGET_SSE"
2799 {
2800 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2801
2802 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
2803
2804 /* Fix up the destination if needed. */
2805 if (dst != operands[0])
2806 emit_move_insn (operands[0], dst);
2807
2808 DONE;
2809 })
2810
2811 (define_insn "sse_movlhps"
2812 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
2813 (vec_select:V4SF
2814 (vec_concat:V8SF
2815 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2816 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
2817 (parallel [(const_int 0)
2818 (const_int 1)
2819 (const_int 4)
2820 (const_int 5)])))]
2821 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2822 "@
2823 movlhps\t{%2, %0|%0, %2}
2824 vmovlhps\t{%2, %1, %0|%0, %1, %2}
2825 movhps\t{%2, %0|%0, %2}
2826 vmovhps\t{%2, %1, %0|%0, %1, %2}
2827 %vmovlps\t{%2, %H0|%H0, %2}"
2828 [(set_attr "isa" "noavx,avx,noavx,avx,base")
2829 (set_attr "type" "ssemov")
2830 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2831 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2832
2833 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
2834 (define_insn "avx_unpckhps256"
2835 [(set (match_operand:V8SF 0 "register_operand" "=x")
2836 (vec_select:V8SF
2837 (vec_concat:V16SF
2838 (match_operand:V8SF 1 "register_operand" "x")
2839 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2840 (parallel [(const_int 2) (const_int 10)
2841 (const_int 3) (const_int 11)
2842 (const_int 6) (const_int 14)
2843 (const_int 7) (const_int 15)])))]
2844 "TARGET_AVX"
2845 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2846 [(set_attr "type" "sselog")
2847 (set_attr "prefix" "vex")
2848 (set_attr "mode" "V8SF")])
2849
2850 (define_expand "vec_interleave_highv8sf"
2851 [(set (match_dup 3)
2852 (vec_select:V8SF
2853 (vec_concat:V16SF
2854 (match_operand:V8SF 1 "register_operand" "x")
2855 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2856 (parallel [(const_int 0) (const_int 8)
2857 (const_int 1) (const_int 9)
2858 (const_int 4) (const_int 12)
2859 (const_int 5) (const_int 13)])))
2860 (set (match_dup 4)
2861 (vec_select:V8SF
2862 (vec_concat:V16SF
2863 (match_dup 1)
2864 (match_dup 2))
2865 (parallel [(const_int 2) (const_int 10)
2866 (const_int 3) (const_int 11)
2867 (const_int 6) (const_int 14)
2868 (const_int 7) (const_int 15)])))
2869 (set (match_operand:V8SF 0 "register_operand" "")
2870 (vec_select:V8SF
2871 (vec_concat:V16SF
2872 (match_dup 3)
2873 (match_dup 4))
2874 (parallel [(const_int 4) (const_int 5)
2875 (const_int 6) (const_int 7)
2876 (const_int 12) (const_int 13)
2877 (const_int 14) (const_int 15)])))]
2878 "TARGET_AVX"
2879 {
2880 operands[3] = gen_reg_rtx (V8SFmode);
2881 operands[4] = gen_reg_rtx (V8SFmode);
2882 })
2883
2884 (define_insn "vec_interleave_highv4sf"
2885 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2886 (vec_select:V4SF
2887 (vec_concat:V8SF
2888 (match_operand:V4SF 1 "register_operand" "0,x")
2889 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2890 (parallel [(const_int 2) (const_int 6)
2891 (const_int 3) (const_int 7)])))]
2892 "TARGET_SSE"
2893 "@
2894 unpckhps\t{%2, %0|%0, %2}
2895 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2896 [(set_attr "isa" "noavx,avx")
2897 (set_attr "type" "sselog")
2898 (set_attr "prefix" "orig,vex")
2899 (set_attr "mode" "V4SF")])
2900
2901 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
2902 (define_insn "avx_unpcklps256"
2903 [(set (match_operand:V8SF 0 "register_operand" "=x")
2904 (vec_select:V8SF
2905 (vec_concat:V16SF
2906 (match_operand:V8SF 1 "register_operand" "x")
2907 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2908 (parallel [(const_int 0) (const_int 8)
2909 (const_int 1) (const_int 9)
2910 (const_int 4) (const_int 12)
2911 (const_int 5) (const_int 13)])))]
2912 "TARGET_AVX"
2913 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
2914 [(set_attr "type" "sselog")
2915 (set_attr "prefix" "vex")
2916 (set_attr "mode" "V8SF")])
2917
2918 (define_expand "vec_interleave_lowv8sf"
2919 [(set (match_dup 3)
2920 (vec_select:V8SF
2921 (vec_concat:V16SF
2922 (match_operand:V8SF 1 "register_operand" "x")
2923 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2924 (parallel [(const_int 0) (const_int 8)
2925 (const_int 1) (const_int 9)
2926 (const_int 4) (const_int 12)
2927 (const_int 5) (const_int 13)])))
2928 (set (match_dup 4)
2929 (vec_select:V8SF
2930 (vec_concat:V16SF
2931 (match_dup 1)
2932 (match_dup 2))
2933 (parallel [(const_int 2) (const_int 10)
2934 (const_int 3) (const_int 11)
2935 (const_int 6) (const_int 14)
2936 (const_int 7) (const_int 15)])))
2937 (set (match_operand:V8SF 0 "register_operand" "")
2938 (vec_select:V8SF
2939 (vec_concat:V16SF
2940 (match_dup 3)
2941 (match_dup 4))
2942 (parallel [(const_int 0) (const_int 1)
2943 (const_int 2) (const_int 3)
2944 (const_int 8) (const_int 9)
2945 (const_int 10) (const_int 11)])))]
2946 "TARGET_AVX"
2947 {
2948 operands[3] = gen_reg_rtx (V8SFmode);
2949 operands[4] = gen_reg_rtx (V8SFmode);
2950 })
2951
2952 (define_insn "vec_interleave_lowv4sf"
2953 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2954 (vec_select:V4SF
2955 (vec_concat:V8SF
2956 (match_operand:V4SF 1 "register_operand" "0,x")
2957 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2958 (parallel [(const_int 0) (const_int 4)
2959 (const_int 1) (const_int 5)])))]
2960 "TARGET_SSE"
2961 "@
2962 unpcklps\t{%2, %0|%0, %2}
2963 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
2964 [(set_attr "isa" "noavx,avx")
2965 (set_attr "type" "sselog")
2966 (set_attr "prefix" "orig,vex")
2967 (set_attr "mode" "V4SF")])
2968
2969 ;; These are modeled with the same vec_concat as the others so that we
2970 ;; capture users of shufps that can use the new instructions
2971 (define_insn "avx_movshdup256"
2972 [(set (match_operand:V8SF 0 "register_operand" "=x")
2973 (vec_select:V8SF
2974 (vec_concat:V16SF
2975 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2976 (match_dup 1))
2977 (parallel [(const_int 1) (const_int 1)
2978 (const_int 3) (const_int 3)
2979 (const_int 5) (const_int 5)
2980 (const_int 7) (const_int 7)])))]
2981 "TARGET_AVX"
2982 "vmovshdup\t{%1, %0|%0, %1}"
2983 [(set_attr "type" "sse")
2984 (set_attr "prefix" "vex")
2985 (set_attr "mode" "V8SF")])
2986
2987 (define_insn "sse3_movshdup"
2988 [(set (match_operand:V4SF 0 "register_operand" "=x")
2989 (vec_select:V4SF
2990 (vec_concat:V8SF
2991 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2992 (match_dup 1))
2993 (parallel [(const_int 1)
2994 (const_int 1)
2995 (const_int 7)
2996 (const_int 7)])))]
2997 "TARGET_SSE3"
2998 "%vmovshdup\t{%1, %0|%0, %1}"
2999 [(set_attr "type" "sse")
3000 (set_attr "prefix_rep" "1")
3001 (set_attr "prefix" "maybe_vex")
3002 (set_attr "mode" "V4SF")])
3003
3004 (define_insn "avx_movsldup256"
3005 [(set (match_operand:V8SF 0 "register_operand" "=x")
3006 (vec_select:V8SF
3007 (vec_concat:V16SF
3008 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3009 (match_dup 1))
3010 (parallel [(const_int 0) (const_int 0)
3011 (const_int 2) (const_int 2)
3012 (const_int 4) (const_int 4)
3013 (const_int 6) (const_int 6)])))]
3014 "TARGET_AVX"
3015 "vmovsldup\t{%1, %0|%0, %1}"
3016 [(set_attr "type" "sse")
3017 (set_attr "prefix" "vex")
3018 (set_attr "mode" "V8SF")])
3019
3020 (define_insn "sse3_movsldup"
3021 [(set (match_operand:V4SF 0 "register_operand" "=x")
3022 (vec_select:V4SF
3023 (vec_concat:V8SF
3024 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3025 (match_dup 1))
3026 (parallel [(const_int 0)
3027 (const_int 0)
3028 (const_int 6)
3029 (const_int 6)])))]
3030 "TARGET_SSE3"
3031 "%vmovsldup\t{%1, %0|%0, %1}"
3032 [(set_attr "type" "sse")
3033 (set_attr "prefix_rep" "1")
3034 (set_attr "prefix" "maybe_vex")
3035 (set_attr "mode" "V4SF")])
3036
3037 (define_expand "avx_shufps256"
3038 [(match_operand:V8SF 0 "register_operand" "")
3039 (match_operand:V8SF 1 "register_operand" "")
3040 (match_operand:V8SF 2 "nonimmediate_operand" "")
3041 (match_operand:SI 3 "const_int_operand" "")]
3042 "TARGET_AVX"
3043 {
3044 int mask = INTVAL (operands[3]);
3045 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3046 GEN_INT ((mask >> 0) & 3),
3047 GEN_INT ((mask >> 2) & 3),
3048 GEN_INT (((mask >> 4) & 3) + 8),
3049 GEN_INT (((mask >> 6) & 3) + 8),
3050 GEN_INT (((mask >> 0) & 3) + 4),
3051 GEN_INT (((mask >> 2) & 3) + 4),
3052 GEN_INT (((mask >> 4) & 3) + 12),
3053 GEN_INT (((mask >> 6) & 3) + 12)));
3054 DONE;
3055 })
3056
3057 ;; One bit in mask selects 2 elements.
3058 (define_insn "avx_shufps256_1"
3059 [(set (match_operand:V8SF 0 "register_operand" "=x")
3060 (vec_select:V8SF
3061 (vec_concat:V16SF
3062 (match_operand:V8SF 1 "register_operand" "x")
3063 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3064 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3065 (match_operand 4 "const_0_to_3_operand" "")
3066 (match_operand 5 "const_8_to_11_operand" "")
3067 (match_operand 6 "const_8_to_11_operand" "")
3068 (match_operand 7 "const_4_to_7_operand" "")
3069 (match_operand 8 "const_4_to_7_operand" "")
3070 (match_operand 9 "const_12_to_15_operand" "")
3071 (match_operand 10 "const_12_to_15_operand" "")])))]
3072 "TARGET_AVX
3073 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3074 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3075 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3076 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3077 {
3078 int mask;
3079 mask = INTVAL (operands[3]);
3080 mask |= INTVAL (operands[4]) << 2;
3081 mask |= (INTVAL (operands[5]) - 8) << 4;
3082 mask |= (INTVAL (operands[6]) - 8) << 6;
3083 operands[3] = GEN_INT (mask);
3084
3085 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3086 }
3087 [(set_attr "type" "sselog")
3088 (set_attr "length_immediate" "1")
3089 (set_attr "prefix" "vex")
3090 (set_attr "mode" "V8SF")])
3091
3092 (define_expand "sse_shufps"
3093 [(match_operand:V4SF 0 "register_operand" "")
3094 (match_operand:V4SF 1 "register_operand" "")
3095 (match_operand:V4SF 2 "nonimmediate_operand" "")
3096 (match_operand:SI 3 "const_int_operand" "")]
3097 "TARGET_SSE"
3098 {
3099 int mask = INTVAL (operands[3]);
3100 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3101 GEN_INT ((mask >> 0) & 3),
3102 GEN_INT ((mask >> 2) & 3),
3103 GEN_INT (((mask >> 4) & 3) + 4),
3104 GEN_INT (((mask >> 6) & 3) + 4)));
3105 DONE;
3106 })
3107
3108 (define_insn "sse_shufps_<mode>"
3109 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3110 (vec_select:VI4F_128
3111 (vec_concat:<ssedoublevecmode>
3112 (match_operand:VI4F_128 1 "register_operand" "0,x")
3113 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3114 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3115 (match_operand 4 "const_0_to_3_operand" "")
3116 (match_operand 5 "const_4_to_7_operand" "")
3117 (match_operand 6 "const_4_to_7_operand" "")])))]
3118 "TARGET_SSE"
3119 {
3120 int mask = 0;
3121 mask |= INTVAL (operands[3]) << 0;
3122 mask |= INTVAL (operands[4]) << 2;
3123 mask |= (INTVAL (operands[5]) - 4) << 4;
3124 mask |= (INTVAL (operands[6]) - 4) << 6;
3125 operands[3] = GEN_INT (mask);
3126
3127 switch (which_alternative)
3128 {
3129 case 0:
3130 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3131 case 1:
3132 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3133 default:
3134 gcc_unreachable ();
3135 }
3136 }
3137 [(set_attr "isa" "noavx,avx")
3138 (set_attr "type" "sselog")
3139 (set_attr "length_immediate" "1")
3140 (set_attr "prefix" "orig,vex")
3141 (set_attr "mode" "V4SF")])
3142
3143 (define_insn "sse_storehps"
3144 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3145 (vec_select:V2SF
3146 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3147 (parallel [(const_int 2) (const_int 3)])))]
3148 "TARGET_SSE"
3149 "@
3150 %vmovhps\t{%1, %0|%0, %1}
3151 %vmovhlps\t{%1, %d0|%d0, %1}
3152 %vmovlps\t{%H1, %d0|%d0, %H1}"
3153 [(set_attr "type" "ssemov")
3154 (set_attr "prefix" "maybe_vex")
3155 (set_attr "mode" "V2SF,V4SF,V2SF")])
3156
3157 (define_expand "sse_loadhps_exp"
3158 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3159 (vec_concat:V4SF
3160 (vec_select:V2SF
3161 (match_operand:V4SF 1 "nonimmediate_operand" "")
3162 (parallel [(const_int 0) (const_int 1)]))
3163 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3164 "TARGET_SSE"
3165 {
3166 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3167
3168 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3169
3170 /* Fix up the destination if needed. */
3171 if (dst != operands[0])
3172 emit_move_insn (operands[0], dst);
3173
3174 DONE;
3175 })
3176
3177 (define_insn "sse_loadhps"
3178 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3179 (vec_concat:V4SF
3180 (vec_select:V2SF
3181 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3182 (parallel [(const_int 0) (const_int 1)]))
3183 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3184 "TARGET_SSE"
3185 "@
3186 movhps\t{%2, %0|%0, %2}
3187 vmovhps\t{%2, %1, %0|%0, %1, %2}
3188 movlhps\t{%2, %0|%0, %2}
3189 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3190 %vmovlps\t{%2, %H0|%H0, %2}"
3191 [(set_attr "isa" "noavx,avx,noavx,avx,base")
3192 (set_attr "type" "ssemov")
3193 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3194 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3195
3196 (define_insn "sse_storelps"
3197 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3198 (vec_select:V2SF
3199 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3200 (parallel [(const_int 0) (const_int 1)])))]
3201 "TARGET_SSE"
3202 "@
3203 %vmovlps\t{%1, %0|%0, %1}
3204 %vmovaps\t{%1, %0|%0, %1}
3205 %vmovlps\t{%1, %d0|%d0, %1}"
3206 [(set_attr "type" "ssemov")
3207 (set_attr "prefix" "maybe_vex")
3208 (set_attr "mode" "V2SF,V4SF,V2SF")])
3209
3210 (define_expand "sse_loadlps_exp"
3211 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3212 (vec_concat:V4SF
3213 (match_operand:V2SF 2 "nonimmediate_operand" "")
3214 (vec_select:V2SF
3215 (match_operand:V4SF 1 "nonimmediate_operand" "")
3216 (parallel [(const_int 2) (const_int 3)]))))]
3217 "TARGET_SSE"
3218 {
3219 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3220
3221 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3222
3223 /* Fix up the destination if needed. */
3224 if (dst != operands[0])
3225 emit_move_insn (operands[0], dst);
3226
3227 DONE;
3228 })
3229
3230 (define_insn "sse_loadlps"
3231 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3232 (vec_concat:V4SF
3233 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3234 (vec_select:V2SF
3235 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3236 (parallel [(const_int 2) (const_int 3)]))))]
3237 "TARGET_SSE"
3238 "@
3239 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3240 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3241 movlps\t{%2, %0|%0, %2}
3242 vmovlps\t{%2, %1, %0|%0, %1, %2}
3243 %vmovlps\t{%2, %0|%0, %2}"
3244 [(set_attr "isa" "noavx,avx,noavx,avx,base")
3245 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3246 (set_attr "length_immediate" "1,1,*,*,*")
3247 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3248 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3249
3250 (define_insn "sse_movss"
3251 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3252 (vec_merge:V4SF
3253 (match_operand:V4SF 2 "register_operand" " x,x")
3254 (match_operand:V4SF 1 "register_operand" " 0,x")
3255 (const_int 1)))]
3256 "TARGET_SSE"
3257 "@
3258 movss\t{%2, %0|%0, %2}
3259 vmovss\t{%2, %1, %0|%0, %1, %2}"
3260 [(set_attr "isa" "noavx,avx")
3261 (set_attr "type" "ssemov")
3262 (set_attr "prefix" "orig,vex")
3263 (set_attr "mode" "SF")])
3264
3265 (define_expand "vec_dupv4sf"
3266 [(set (match_operand:V4SF 0 "register_operand" "")
3267 (vec_duplicate:V4SF
3268 (match_operand:SF 1 "nonimmediate_operand" "")))]
3269 "TARGET_SSE"
3270 {
3271 if (!TARGET_AVX)
3272 operands[1] = force_reg (V4SFmode, operands[1]);
3273 })
3274
3275 (define_insn "*vec_dupv4sf_avx"
3276 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3277 (vec_duplicate:V4SF
3278 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3279 "TARGET_AVX"
3280 "@
3281 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3282 vbroadcastss\t{%1, %0|%0, %1}"
3283 [(set_attr "type" "sselog1,ssemov")
3284 (set_attr "length_immediate" "1,0")
3285 (set_attr "prefix_extra" "0,1")
3286 (set_attr "prefix" "vex")
3287 (set_attr "mode" "V4SF")])
3288
3289 (define_insn "*vec_dupv4sf"
3290 [(set (match_operand:V4SF 0 "register_operand" "=x")
3291 (vec_duplicate:V4SF
3292 (match_operand:SF 1 "register_operand" "0")))]
3293 "TARGET_SSE"
3294 "shufps\t{$0, %0, %0|%0, %0, 0}"
3295 [(set_attr "type" "sselog1")
3296 (set_attr "length_immediate" "1")
3297 (set_attr "mode" "V4SF")])
3298
3299 ;; Although insertps takes register source, we prefer
3300 ;; unpcklps with register source since it is shorter.
3301 (define_insn "*vec_concatv2sf_sse4_1"
3302 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3303 (vec_concat:V2SF
3304 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3305 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3306 "TARGET_SSE4_1"
3307 "@
3308 unpcklps\t{%2, %0|%0, %2}
3309 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3310 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3311 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3312 %vmovss\t{%1, %0|%0, %1}
3313 punpckldq\t{%2, %0|%0, %2}
3314 movd\t{%1, %0|%0, %1}"
3315 [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base")
3316 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3317 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3318 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3319 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3320 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3321 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3322
3323 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3324 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3325 ;; alternatives pretty much forces the MMX alternative to be chosen.
3326 (define_insn "*vec_concatv2sf_sse"
3327 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3328 (vec_concat:V2SF
3329 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3330 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3331 "TARGET_SSE"
3332 "@
3333 unpcklps\t{%2, %0|%0, %2}
3334 movss\t{%1, %0|%0, %1}
3335 punpckldq\t{%2, %0|%0, %2}
3336 movd\t{%1, %0|%0, %1}"
3337 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3338 (set_attr "mode" "V4SF,SF,DI,DI")])
3339
3340 (define_insn "*vec_concatv4sf_sse"
3341 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3342 (vec_concat:V4SF
3343 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3344 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3345 "TARGET_SSE"
3346 "@
3347 movlhps\t{%2, %0|%0, %2}
3348 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3349 movhps\t{%2, %0|%0, %2}
3350 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3351 [(set_attr "isa" "noavx,avx,noavx,avx")
3352 (set_attr "type" "ssemov")
3353 (set_attr "prefix" "orig,vex,orig,vex")
3354 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3355
3356 (define_expand "vec_init<mode>"
3357 [(match_operand:V_128 0 "register_operand" "")
3358 (match_operand 1 "" "")]
3359 "TARGET_SSE"
3360 {
3361 ix86_expand_vector_init (false, operands[0], operands[1]);
3362 DONE;
3363 })
3364
3365 ;; Avoid combining registers from different units in a single alternative,
3366 ;; see comment above inline_secondary_memory_needed function in i386.c
3367 (define_insn "*vec_set<mode>_0_sse4_1"
3368 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3369 "=x,x,x ,x,x,x ,x ,m,m,m")
3370 (vec_merge:VI4F_128
3371 (vec_duplicate:VI4F_128
3372 (match_operand:<ssescalarmode> 2 "general_operand"
3373 " x,m,*r,x,x,*rm,*rm,x,*r,fF"))
3374 (match_operand:VI4F_128 1 "vector_move_operand"
3375 " C,C,C ,0,x,0 ,x ,0,0 ,0")
3376 (const_int 1)))]
3377 "TARGET_SSE4_1"
3378 "@
3379 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3380 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3381 %vmovd\t{%2, %0|%0, %2}
3382 movss\t{%2, %0|%0, %2}
3383 vmovss\t{%2, %1, %0|%0, %1, %2}
3384 pinsrd\t{$0, %2, %0|%0, %2, 0}
3385 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3386 #
3387 #
3388 #"
3389 [(set_attr "isa" "base,base,base,noavx,avx,noavx,avx,base,base,base")
3390 (set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov,sselog,sselog,*,*,*")
3391 (set_attr "prefix_extra" "*,*,*,*,*,1,1,*,*,*")
3392 (set_attr "length_immediate" "*,*,*,*,*,1,1,*,*,*")
3393 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,vex,orig,vex,*,*,*")
3394 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,TI,TI,*,*,*")])
3395
3396 ;; Avoid combining registers from different units in a single alternative,
3397 ;; see comment above inline_secondary_memory_needed function in i386.c
3398 (define_insn "*vec_set<mode>_0_sse2"
3399 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3400 "=x,x ,x,m,m ,m")
3401 (vec_merge:VI4F_128
3402 (vec_duplicate:VI4F_128
3403 (match_operand:<ssescalarmode> 2 "general_operand"
3404 " m,*r,x,x,*r,fF"))
3405 (match_operand:VI4F_128 1 "vector_move_operand"
3406 " C, C,0,0,0 ,0")
3407 (const_int 1)))]
3408 "TARGET_SSE2"
3409 "@
3410 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3411 movd\t{%2, %0|%0, %2}
3412 movss\t{%2, %0|%0, %2}
3413 #
3414 #
3415 #"
3416 [(set_attr "type" "ssemov")
3417 (set_attr "mode" "<ssescalarmode>,SI,SF,*,*,*")])
3418
3419 ;; Avoid combining registers from different units in a single alternative,
3420 ;; see comment above inline_secondary_memory_needed function in i386.c
3421 (define_insn "vec_set<mode>_0"
3422 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3423 "=x,x,m,m ,m")
3424 (vec_merge:VI4F_128
3425 (vec_duplicate:VI4F_128
3426 (match_operand:<ssescalarmode> 2 "general_operand"
3427 " m,x,x,*r,fF"))
3428 (match_operand:VI4F_128 1 "vector_move_operand"
3429 " C,0,0,0 ,0")
3430 (const_int 1)))]
3431 "TARGET_SSE"
3432 "@
3433 movss\t{%2, %0|%0, %2}
3434 movss\t{%2, %0|%0, %2}
3435 #
3436 #
3437 #"
3438 [(set_attr "type" "ssemov")
3439 (set_attr "mode" "SF,SF,*,*,*")])
3440
3441 ;; A subset is vec_setv4sf.
3442 (define_insn "*vec_setv4sf_sse4_1"
3443 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3444 (vec_merge:V4SF
3445 (vec_duplicate:V4SF
3446 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3447 (match_operand:V4SF 1 "register_operand" "0,x")
3448 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n,n")))]
3449 "TARGET_SSE4_1"
3450 {
3451 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3452 switch (which_alternative)
3453 {
3454 case 0:
3455 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3456 case 1:
3457 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3458 default:
3459 gcc_unreachable ();
3460 }
3461 }
3462 [(set_attr "isa" "noavx,avx")
3463 (set_attr "type" "sselog")
3464 (set_attr "prefix_data16" "1,*")
3465 (set_attr "prefix_extra" "1")
3466 (set_attr "length_immediate" "1")
3467 (set_attr "prefix" "orig,vex")
3468 (set_attr "mode" "V4SF")])
3469
3470 (define_insn "sse4_1_insertps"
3471 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3472 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3473 (match_operand:V4SF 1 "register_operand" "0,x")
3474 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3475 UNSPEC_INSERTPS))]
3476 "TARGET_SSE4_1"
3477 {
3478 if (MEM_P (operands[2]))
3479 {
3480 unsigned count_s = INTVAL (operands[3]) >> 6;
3481 if (count_s)
3482 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3483 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3484 }
3485 switch (which_alternative)
3486 {
3487 case 0:
3488 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3489 case 1:
3490 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3491 default:
3492 gcc_unreachable ();
3493 }
3494 }
3495 [(set_attr "isa" "noavx,avx")
3496 (set_attr "type" "sselog")
3497 (set_attr "prefix_data16" "1,*")
3498 (set_attr "prefix_extra" "1")
3499 (set_attr "length_immediate" "1")
3500 (set_attr "prefix" "orig,vex")
3501 (set_attr "mode" "V4SF")])
3502
3503 (define_split
3504 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3505 (vec_merge:VI4F_128
3506 (vec_duplicate:VI4F_128
3507 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3508 (match_dup 0)
3509 (const_int 1)))]
3510 "TARGET_SSE && reload_completed"
3511 [(const_int 0)]
3512 {
3513 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3514 operands[1]);
3515 DONE;
3516 })
3517
3518 (define_expand "vec_set<mode>"
3519 [(match_operand:V_128 0 "register_operand" "")
3520 (match_operand:<ssescalarmode> 1 "register_operand" "")
3521 (match_operand 2 "const_int_operand" "")]
3522 "TARGET_SSE"
3523 {
3524 ix86_expand_vector_set (false, operands[0], operands[1],
3525 INTVAL (operands[2]));
3526 DONE;
3527 })
3528
3529 (define_insn_and_split "*vec_extractv4sf_0"
3530 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3531 (vec_select:SF
3532 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3533 (parallel [(const_int 0)])))]
3534 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3535 "#"
3536 "&& reload_completed"
3537 [(const_int 0)]
3538 {
3539 rtx op1 = operands[1];
3540 if (REG_P (op1))
3541 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3542 else
3543 op1 = gen_lowpart (SFmode, op1);
3544 emit_move_insn (operands[0], op1);
3545 DONE;
3546 })
3547
3548 (define_expand "avx_vextractf128<mode>"
3549 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3550 (match_operand:V_256 1 "register_operand" "")
3551 (match_operand:SI 2 "const_0_to_1_operand" "")]
3552 "TARGET_AVX"
3553 {
3554 rtx (*insn)(rtx, rtx);
3555
3556 switch (INTVAL (operands[2]))
3557 {
3558 case 0:
3559 insn = gen_vec_extract_lo_<mode>;
3560 break;
3561 case 1:
3562 insn = gen_vec_extract_hi_<mode>;
3563 break;
3564 default:
3565 gcc_unreachable ();
3566 }
3567
3568 emit_insn (insn (operands[0], operands[1]));
3569 DONE;
3570 })
3571
3572 (define_insn_and_split "vec_extract_lo_<mode>"
3573 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3574 (vec_select:<ssehalfvecmode>
3575 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
3576 (parallel [(const_int 0) (const_int 1)])))]
3577 "TARGET_AVX"
3578 "#"
3579 "&& reload_completed"
3580 [(const_int 0)]
3581 {
3582 rtx op1 = operands[1];
3583 if (REG_P (op1))
3584 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3585 else
3586 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3587 emit_move_insn (operands[0], op1);
3588 DONE;
3589 })
3590
3591 (define_insn "vec_extract_hi_<mode>"
3592 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3593 (vec_select:<ssehalfvecmode>
3594 (match_operand:VI8F_256 1 "register_operand" "x,x")
3595 (parallel [(const_int 2) (const_int 3)])))]
3596 "TARGET_AVX"
3597 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3598 [(set_attr "type" "sselog")
3599 (set_attr "prefix_extra" "1")
3600 (set_attr "length_immediate" "1")
3601 (set_attr "memory" "none,store")
3602 (set_attr "prefix" "vex")
3603 (set_attr "mode" "V8SF")])
3604
3605 (define_insn_and_split "vec_extract_lo_<mode>"
3606 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3607 (vec_select:<ssehalfvecmode>
3608 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
3609 (parallel [(const_int 0) (const_int 1)
3610 (const_int 2) (const_int 3)])))]
3611 "TARGET_AVX"
3612 "#"
3613 "&& reload_completed"
3614 [(const_int 0)]
3615 {
3616 rtx op1 = operands[1];
3617 if (REG_P (op1))
3618 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3619 else
3620 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3621 emit_move_insn (operands[0], op1);
3622 DONE;
3623 })
3624
3625 (define_insn "vec_extract_hi_<mode>"
3626 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3627 (vec_select:<ssehalfvecmode>
3628 (match_operand:VI4F_256 1 "register_operand" "x,x")
3629 (parallel [(const_int 4) (const_int 5)
3630 (const_int 6) (const_int 7)])))]
3631 "TARGET_AVX"
3632 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3633 [(set_attr "type" "sselog")
3634 (set_attr "prefix_extra" "1")
3635 (set_attr "length_immediate" "1")
3636 (set_attr "memory" "none,store")
3637 (set_attr "prefix" "vex")
3638 (set_attr "mode" "V8SF")])
3639
3640 (define_insn_and_split "vec_extract_lo_v16hi"
3641 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3642 (vec_select:V8HI
3643 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
3644 (parallel [(const_int 0) (const_int 1)
3645 (const_int 2) (const_int 3)
3646 (const_int 4) (const_int 5)
3647 (const_int 6) (const_int 7)])))]
3648 "TARGET_AVX"
3649 "#"
3650 "&& reload_completed"
3651 [(const_int 0)]
3652 {
3653 rtx op1 = operands[1];
3654 if (REG_P (op1))
3655 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
3656 else
3657 op1 = gen_lowpart (V8HImode, op1);
3658 emit_move_insn (operands[0], op1);
3659 DONE;
3660 })
3661
3662 (define_insn "vec_extract_hi_v16hi"
3663 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3664 (vec_select:V8HI
3665 (match_operand:V16HI 1 "register_operand" "x,x")
3666 (parallel [(const_int 8) (const_int 9)
3667 (const_int 10) (const_int 11)
3668 (const_int 12) (const_int 13)
3669 (const_int 14) (const_int 15)])))]
3670 "TARGET_AVX"
3671 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3672 [(set_attr "type" "sselog")
3673 (set_attr "prefix_extra" "1")
3674 (set_attr "length_immediate" "1")
3675 (set_attr "memory" "none,store")
3676 (set_attr "prefix" "vex")
3677 (set_attr "mode" "V8SF")])
3678
3679 (define_insn_and_split "vec_extract_lo_v32qi"
3680 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3681 (vec_select:V16QI
3682 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
3683 (parallel [(const_int 0) (const_int 1)
3684 (const_int 2) (const_int 3)
3685 (const_int 4) (const_int 5)
3686 (const_int 6) (const_int 7)
3687 (const_int 8) (const_int 9)
3688 (const_int 10) (const_int 11)
3689 (const_int 12) (const_int 13)
3690 (const_int 14) (const_int 15)])))]
3691 "TARGET_AVX"
3692 "#"
3693 "&& reload_completed"
3694 [(const_int 0)]
3695 {
3696 rtx op1 = operands[1];
3697 if (REG_P (op1))
3698 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
3699 else
3700 op1 = gen_lowpart (V16QImode, op1);
3701 emit_move_insn (operands[0], op1);
3702 DONE;
3703 })
3704
3705 (define_insn "vec_extract_hi_v32qi"
3706 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3707 (vec_select:V16QI
3708 (match_operand:V32QI 1 "register_operand" "x,x")
3709 (parallel [(const_int 16) (const_int 17)
3710 (const_int 18) (const_int 19)
3711 (const_int 20) (const_int 21)
3712 (const_int 22) (const_int 23)
3713 (const_int 24) (const_int 25)
3714 (const_int 26) (const_int 27)
3715 (const_int 28) (const_int 29)
3716 (const_int 30) (const_int 31)])))]
3717 "TARGET_AVX"
3718 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3719 [(set_attr "type" "sselog")
3720 (set_attr "prefix_extra" "1")
3721 (set_attr "length_immediate" "1")
3722 (set_attr "memory" "none,store")
3723 (set_attr "prefix" "vex")
3724 (set_attr "mode" "V8SF")])
3725
3726 (define_insn "*sse4_1_extractps"
3727 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3728 (vec_select:SF
3729 (match_operand:V4SF 1 "register_operand" "x")
3730 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3731 "TARGET_SSE4_1"
3732 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3733 [(set_attr "type" "sselog")
3734 (set_attr "prefix_data16" "1")
3735 (set_attr "prefix_extra" "1")
3736 (set_attr "length_immediate" "1")
3737 (set_attr "prefix" "maybe_vex")
3738 (set_attr "mode" "V4SF")])
3739
3740 (define_insn_and_split "*vec_extract_v4sf_mem"
3741 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3742 (vec_select:SF
3743 (match_operand:V4SF 1 "memory_operand" "o")
3744 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3745 ""
3746 "#"
3747 "reload_completed"
3748 [(const_int 0)]
3749 {
3750 int i = INTVAL (operands[2]);
3751
3752 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3753 DONE;
3754 })
3755
3756 ;; Modes handled by vec_extract patterns.
3757 (define_mode_iterator VEC_EXTRACT_MODE
3758 [V16QI V8HI V4SI V2DI
3759 (V8SF "TARGET_AVX") V4SF
3760 (V4DF "TARGET_AVX") V2DF])
3761
3762 (define_expand "vec_extract<mode>"
3763 [(match_operand:<ssescalarmode> 0 "register_operand" "")
3764 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
3765 (match_operand 2 "const_int_operand" "")]
3766 "TARGET_SSE"
3767 {
3768 ix86_expand_vector_extract (false, operands[0], operands[1],
3769 INTVAL (operands[2]));
3770 DONE;
3771 })
3772
3773 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3774 ;;
3775 ;; Parallel double-precision floating point element swizzling
3776 ;;
3777 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3778
3779 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3780 (define_insn "avx_unpckhpd256"
3781 [(set (match_operand:V4DF 0 "register_operand" "=x")
3782 (vec_select:V4DF
3783 (vec_concat:V8DF
3784 (match_operand:V4DF 1 "register_operand" "x")
3785 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3786 (parallel [(const_int 1) (const_int 5)
3787 (const_int 3) (const_int 7)])))]
3788 "TARGET_AVX"
3789 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3790 [(set_attr "type" "sselog")
3791 (set_attr "prefix" "vex")
3792 (set_attr "mode" "V4DF")])
3793
3794 (define_expand "vec_interleave_highv4df"
3795 [(set (match_dup 3)
3796 (vec_select:V4DF
3797 (vec_concat:V8DF
3798 (match_operand:V4DF 1 "register_operand" "x")
3799 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3800 (parallel [(const_int 0) (const_int 4)
3801 (const_int 2) (const_int 6)])))
3802 (set (match_dup 4)
3803 (vec_select:V4DF
3804 (vec_concat:V8DF
3805 (match_dup 1)
3806 (match_dup 2))
3807 (parallel [(const_int 1) (const_int 5)
3808 (const_int 3) (const_int 7)])))
3809 (set (match_operand:V4DF 0 "register_operand" "")
3810 (vec_select:V4DF
3811 (vec_concat:V8DF
3812 (match_dup 3)
3813 (match_dup 4))
3814 (parallel [(const_int 2) (const_int 3)
3815 (const_int 6) (const_int 7)])))]
3816 "TARGET_AVX"
3817 {
3818 operands[3] = gen_reg_rtx (V4DFmode);
3819 operands[4] = gen_reg_rtx (V4DFmode);
3820 })
3821
3822
3823 (define_expand "vec_interleave_highv2df"
3824 [(set (match_operand:V2DF 0 "register_operand" "")
3825 (vec_select:V2DF
3826 (vec_concat:V4DF
3827 (match_operand:V2DF 1 "nonimmediate_operand" "")
3828 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3829 (parallel [(const_int 1)
3830 (const_int 3)])))]
3831 "TARGET_SSE2"
3832 {
3833 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
3834 operands[2] = force_reg (V2DFmode, operands[2]);
3835 })
3836
3837 (define_insn "*sse3_interleave_highv2df"
3838 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
3839 (vec_select:V2DF
3840 (vec_concat:V4DF
3841 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
3842 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
3843 (parallel [(const_int 1)
3844 (const_int 3)])))]
3845 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3846 "@
3847 unpckhpd\t{%2, %0|%0, %2}
3848 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
3849 %vmovddup\t{%H1, %0|%0, %H1}
3850 movlpd\t{%H1, %0|%0, %H1}
3851 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3852 %vmovhpd\t{%1, %0|%0, %1}"
3853 [(set_attr "isa" "noavx,avx,base,noavx,avx,base")
3854 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
3855 (set_attr "prefix_data16" "*,*,*,1,*,1")
3856 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
3857 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
3858
3859 (define_insn "*sse2_interleave_highv2df"
3860 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3861 (vec_select:V2DF
3862 (vec_concat:V4DF
3863 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3864 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3865 (parallel [(const_int 1)
3866 (const_int 3)])))]
3867 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3868 "@
3869 unpckhpd\t{%2, %0|%0, %2}
3870 movlpd\t{%H1, %0|%0, %H1}
3871 movhpd\t{%1, %0|%0, %1}"
3872 [(set_attr "type" "sselog,ssemov,ssemov")
3873 (set_attr "prefix_data16" "*,1,1")
3874 (set_attr "mode" "V2DF,V1DF,V1DF")])
3875
3876 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3877 (define_expand "avx_movddup256"
3878 [(set (match_operand:V4DF 0 "register_operand" "")
3879 (vec_select:V4DF
3880 (vec_concat:V8DF
3881 (match_operand:V4DF 1 "nonimmediate_operand" "")
3882 (match_dup 1))
3883 (parallel [(const_int 0) (const_int 4)
3884 (const_int 2) (const_int 6)])))]
3885 "TARGET_AVX")
3886
3887 (define_expand "avx_unpcklpd256"
3888 [(set (match_operand:V4DF 0 "register_operand" "")
3889 (vec_select:V4DF
3890 (vec_concat:V8DF
3891 (match_operand:V4DF 1 "register_operand" "")
3892 (match_operand:V4DF 2 "nonimmediate_operand" ""))
3893 (parallel [(const_int 0) (const_int 4)
3894 (const_int 2) (const_int 6)])))]
3895 "TARGET_AVX")
3896
3897 (define_insn "*avx_unpcklpd256"
3898 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
3899 (vec_select:V4DF
3900 (vec_concat:V8DF
3901 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
3902 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
3903 (parallel [(const_int 0) (const_int 4)
3904 (const_int 2) (const_int 6)])))]
3905 "TARGET_AVX
3906 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
3907 "@
3908 vmovddup\t{%1, %0|%0, %1}
3909 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
3910 [(set_attr "type" "sselog")
3911 (set_attr "prefix" "vex")
3912 (set_attr "mode" "V4DF")])
3913
3914 (define_expand "vec_interleave_lowv4df"
3915 [(set (match_dup 3)
3916 (vec_select:V4DF
3917 (vec_concat:V8DF
3918 (match_operand:V4DF 1 "register_operand" "x")
3919 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3920 (parallel [(const_int 0) (const_int 4)
3921 (const_int 2) (const_int 6)])))
3922 (set (match_dup 4)
3923 (vec_select:V4DF
3924 (vec_concat:V8DF
3925 (match_dup 1)
3926 (match_dup 2))
3927 (parallel [(const_int 1) (const_int 5)
3928 (const_int 3) (const_int 7)])))
3929 (set (match_operand:V4DF 0 "register_operand" "")
3930 (vec_select:V4DF
3931 (vec_concat:V8DF
3932 (match_dup 3)
3933 (match_dup 4))
3934 (parallel [(const_int 0) (const_int 1)
3935 (const_int 4) (const_int 5)])))]
3936 "TARGET_AVX"
3937 {
3938 operands[3] = gen_reg_rtx (V4DFmode);
3939 operands[4] = gen_reg_rtx (V4DFmode);
3940 })
3941
3942 (define_expand "vec_interleave_lowv2df"
3943 [(set (match_operand:V2DF 0 "register_operand" "")
3944 (vec_select:V2DF
3945 (vec_concat:V4DF
3946 (match_operand:V2DF 1 "nonimmediate_operand" "")
3947 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3948 (parallel [(const_int 0)
3949 (const_int 2)])))]
3950 "TARGET_SSE2"
3951 {
3952 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
3953 operands[1] = force_reg (V2DFmode, operands[1]);
3954 })
3955
3956 (define_insn "*sse3_interleave_lowv2df"
3957 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
3958 (vec_select:V2DF
3959 (vec_concat:V4DF
3960 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
3961 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
3962 (parallel [(const_int 0)
3963 (const_int 2)])))]
3964 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
3965 "@
3966 unpcklpd\t{%2, %0|%0, %2}
3967 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
3968 %vmovddup\t{%1, %0|%0, %1}
3969 movhpd\t{%2, %0|%0, %2}
3970 vmovhpd\t{%2, %1, %0|%0, %1, %2}
3971 %vmovlpd\t{%2, %H0|%H0, %2}"
3972 [(set_attr "isa" "noavx,avx,base,noavx,avx,base")
3973 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
3974 (set_attr "prefix_data16" "*,*,*,1,*,1")
3975 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
3976 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
3977
3978 (define_insn "*sse2_interleave_lowv2df"
3979 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
3980 (vec_select:V2DF
3981 (vec_concat:V4DF
3982 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
3983 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
3984 (parallel [(const_int 0)
3985 (const_int 2)])))]
3986 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
3987 "@
3988 unpcklpd\t{%2, %0|%0, %2}
3989 movhpd\t{%2, %0|%0, %2}
3990 movlpd\t{%2, %H0|%H0, %2}"
3991 [(set_attr "type" "sselog,ssemov,ssemov")
3992 (set_attr "prefix_data16" "*,1,1")
3993 (set_attr "mode" "V2DF,V1DF,V1DF")])
3994
3995 (define_split
3996 [(set (match_operand:V2DF 0 "memory_operand" "")
3997 (vec_select:V2DF
3998 (vec_concat:V4DF
3999 (match_operand:V2DF 1 "register_operand" "")
4000 (match_dup 1))
4001 (parallel [(const_int 0)
4002 (const_int 2)])))]
4003 "TARGET_SSE3 && reload_completed"
4004 [(const_int 0)]
4005 {
4006 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4007 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4008 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4009 DONE;
4010 })
4011
4012 (define_split
4013 [(set (match_operand:V2DF 0 "register_operand" "")
4014 (vec_select:V2DF
4015 (vec_concat:V4DF
4016 (match_operand:V2DF 1 "memory_operand" "")
4017 (match_dup 1))
4018 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4019 (match_operand:SI 3 "const_int_operand" "")])))]
4020 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4021 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4022 {
4023 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4024 })
4025
4026 (define_expand "avx_shufpd256"
4027 [(match_operand:V4DF 0 "register_operand" "")
4028 (match_operand:V4DF 1 "register_operand" "")
4029 (match_operand:V4DF 2 "nonimmediate_operand" "")
4030 (match_operand:SI 3 "const_int_operand" "")]
4031 "TARGET_AVX"
4032 {
4033 int mask = INTVAL (operands[3]);
4034 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4035 GEN_INT (mask & 1),
4036 GEN_INT (mask & 2 ? 5 : 4),
4037 GEN_INT (mask & 4 ? 3 : 2),
4038 GEN_INT (mask & 8 ? 7 : 6)));
4039 DONE;
4040 })
4041
4042 (define_insn "avx_shufpd256_1"
4043 [(set (match_operand:V4DF 0 "register_operand" "=x")
4044 (vec_select:V4DF
4045 (vec_concat:V8DF
4046 (match_operand:V4DF 1 "register_operand" "x")
4047 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4048 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4049 (match_operand 4 "const_4_to_5_operand" "")
4050 (match_operand 5 "const_2_to_3_operand" "")
4051 (match_operand 6 "const_6_to_7_operand" "")])))]
4052 "TARGET_AVX"
4053 {
4054 int mask;
4055 mask = INTVAL (operands[3]);
4056 mask |= (INTVAL (operands[4]) - 4) << 1;
4057 mask |= (INTVAL (operands[5]) - 2) << 2;
4058 mask |= (INTVAL (operands[6]) - 6) << 3;
4059 operands[3] = GEN_INT (mask);
4060
4061 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4062 }
4063 [(set_attr "type" "sselog")
4064 (set_attr "length_immediate" "1")
4065 (set_attr "prefix" "vex")
4066 (set_attr "mode" "V4DF")])
4067
4068 (define_expand "sse2_shufpd"
4069 [(match_operand:V2DF 0 "register_operand" "")
4070 (match_operand:V2DF 1 "register_operand" "")
4071 (match_operand:V2DF 2 "nonimmediate_operand" "")
4072 (match_operand:SI 3 "const_int_operand" "")]
4073 "TARGET_SSE2"
4074 {
4075 int mask = INTVAL (operands[3]);
4076 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4077 GEN_INT (mask & 1),
4078 GEN_INT (mask & 2 ? 3 : 2)));
4079 DONE;
4080 })
4081
4082 ;; Modes handled by vec_extract_even/odd pattern.
4083 (define_mode_iterator VEC_EXTRACT_EVENODD_MODE
4084 [(V16QI "TARGET_SSE2")
4085 (V8HI "TARGET_SSE2")
4086 (V4SI "TARGET_SSE2")
4087 (V2DI "TARGET_SSE2")
4088 (V8SF "TARGET_AVX") V4SF
4089 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
4090
4091 (define_expand "vec_extract_even<mode>"
4092 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4093 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4094 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4095 "TARGET_SSE"
4096 {
4097 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4098 DONE;
4099 })
4100
4101 (define_expand "vec_extract_odd<mode>"
4102 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4103 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4104 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4105 "TARGET_SSE"
4106 {
4107 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4108 DONE;
4109 })
4110
4111 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4112
4113 (define_insn "vec_interleave_highv2di"
4114 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4115 (vec_select:V2DI
4116 (vec_concat:V4DI
4117 (match_operand:V2DI 1 "register_operand" "0,x")
4118 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4119 (parallel [(const_int 1)
4120 (const_int 3)])))]
4121 "TARGET_SSE2"
4122 "@
4123 punpckhqdq\t{%2, %0|%0, %2}
4124 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4125 [(set_attr "isa" "noavx,avx")
4126 (set_attr "type" "sselog")
4127 (set_attr "prefix_data16" "1,*")
4128 (set_attr "prefix" "orig,vex")
4129 (set_attr "mode" "TI")])
4130
4131 (define_insn "vec_interleave_lowv2di"
4132 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4133 (vec_select:V2DI
4134 (vec_concat:V4DI
4135 (match_operand:V2DI 1 "register_operand" "0,x")
4136 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4137 (parallel [(const_int 0)
4138 (const_int 2)])))]
4139 "TARGET_SSE2"
4140 "@
4141 punpcklqdq\t{%2, %0|%0, %2}
4142 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4143 [(set_attr "isa" "noavx,avx")
4144 (set_attr "type" "sselog")
4145 (set_attr "prefix_data16" "1,*")
4146 (set_attr "prefix" "orig,vex")
4147 (set_attr "mode" "TI")])
4148
4149 (define_insn "sse2_shufpd_<mode>"
4150 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4151 (vec_select:VI8F_128
4152 (vec_concat:<ssedoublevecmode>
4153 (match_operand:VI8F_128 1 "register_operand" "0,x")
4154 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4155 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4156 (match_operand 4 "const_2_to_3_operand" "")])))]
4157 "TARGET_SSE2"
4158 {
4159 int mask;
4160 mask = INTVAL (operands[3]);
4161 mask |= (INTVAL (operands[4]) - 2) << 1;
4162 operands[3] = GEN_INT (mask);
4163
4164 switch (which_alternative)
4165 {
4166 case 0:
4167 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4168 case 1:
4169 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4170 default:
4171 gcc_unreachable ();
4172 }
4173 }
4174 [(set_attr "isa" "noavx,avx")
4175 (set_attr "type" "sselog")
4176 (set_attr "length_immediate" "1")
4177 (set_attr "prefix" "orig,vex")
4178 (set_attr "mode" "V2DF")])
4179
4180 ;; Avoid combining registers from different units in a single alternative,
4181 ;; see comment above inline_secondary_memory_needed function in i386.c
4182 (define_insn "sse2_storehpd"
4183 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4184 (vec_select:DF
4185 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4186 (parallel [(const_int 1)])))]
4187 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4188 "@
4189 %vmovhpd\t{%1, %0|%0, %1}
4190 unpckhpd\t%0, %0
4191 vunpckhpd\t{%d1, %0|%0, %d1}
4192 #
4193 #
4194 #"
4195 [(set_attr "isa" "base,noavx,avx,base,base,base")
4196 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4197 (set (attr "prefix_data16")
4198 (if_then_else
4199 (and (eq_attr "alternative" "0")
4200 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4201 (const_string "1")
4202 (const_string "*")))
4203 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4204 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4205
4206 (define_split
4207 [(set (match_operand:DF 0 "register_operand" "")
4208 (vec_select:DF
4209 (match_operand:V2DF 1 "memory_operand" "")
4210 (parallel [(const_int 1)])))]
4211 "TARGET_SSE2 && reload_completed"
4212 [(set (match_dup 0) (match_dup 1))]
4213 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4214
4215 ;; Avoid combining registers from different units in a single alternative,
4216 ;; see comment above inline_secondary_memory_needed function in i386.c
4217 (define_insn "sse2_storelpd"
4218 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4219 (vec_select:DF
4220 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4221 (parallel [(const_int 0)])))]
4222 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4223 "@
4224 %vmovlpd\t{%1, %0|%0, %1}
4225 #
4226 #
4227 #
4228 #"
4229 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4230 (set_attr "prefix_data16" "1,*,*,*,*")
4231 (set_attr "prefix" "maybe_vex")
4232 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4233
4234 (define_split
4235 [(set (match_operand:DF 0 "register_operand" "")
4236 (vec_select:DF
4237 (match_operand:V2DF 1 "nonimmediate_operand" "")
4238 (parallel [(const_int 0)])))]
4239 "TARGET_SSE2 && reload_completed"
4240 [(const_int 0)]
4241 {
4242 rtx op1 = operands[1];
4243 if (REG_P (op1))
4244 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4245 else
4246 op1 = gen_lowpart (DFmode, op1);
4247 emit_move_insn (operands[0], op1);
4248 DONE;
4249 })
4250
4251 (define_expand "sse2_loadhpd_exp"
4252 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4253 (vec_concat:V2DF
4254 (vec_select:DF
4255 (match_operand:V2DF 1 "nonimmediate_operand" "")
4256 (parallel [(const_int 0)]))
4257 (match_operand:DF 2 "nonimmediate_operand" "")))]
4258 "TARGET_SSE2"
4259 {
4260 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4261
4262 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4263
4264 /* Fix up the destination if needed. */
4265 if (dst != operands[0])
4266 emit_move_insn (operands[0], dst);
4267
4268 DONE;
4269 })
4270
4271 ;; Avoid combining registers from different units in a single alternative,
4272 ;; see comment above inline_secondary_memory_needed function in i386.c
4273 (define_insn "sse2_loadhpd"
4274 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4275 "=x,x,x,x,x,o,o ,o")
4276 (vec_concat:V2DF
4277 (vec_select:DF
4278 (match_operand:V2DF 1 "nonimmediate_operand"
4279 " 0,x,0,x,x,0,0 ,0")
4280 (parallel [(const_int 0)]))
4281 (match_operand:DF 2 "nonimmediate_operand"
4282 " m,m,x,x,0,x,*f,r")))]
4283 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4284 "@
4285 movhpd\t{%2, %0|%0, %2}
4286 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4287 unpcklpd\t{%2, %0|%0, %2}
4288 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4289 shufpd\t{$1, %1, %0|%0, %1, 1}
4290 #
4291 #
4292 #"
4293 [(set_attr "isa" "noavx,avx,noavx,avx,noavx,base,base,base")
4294 (set_attr "type" "ssemov,ssemov,sselog,sselog,sselog,ssemov,fmov,imov")
4295 (set_attr "prefix_data16" "1,*,*,*,*,*,*,*")
4296 (set_attr "length_immediate" "*,*,*,*,1,*,*,*")
4297 (set_attr "prefix" "orig,vex,orig,vex,orig,*,*,*")
4298 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,V2DF,DF,DF,DF")])
4299
4300 (define_split
4301 [(set (match_operand:V2DF 0 "memory_operand" "")
4302 (vec_concat:V2DF
4303 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4304 (match_operand:DF 1 "register_operand" "")))]
4305 "TARGET_SSE2 && reload_completed"
4306 [(set (match_dup 0) (match_dup 1))]
4307 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4308
4309 (define_expand "sse2_loadlpd_exp"
4310 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4311 (vec_concat:V2DF
4312 (match_operand:DF 2 "nonimmediate_operand" "")
4313 (vec_select:DF
4314 (match_operand:V2DF 1 "nonimmediate_operand" "")
4315 (parallel [(const_int 1)]))))]
4316 "TARGET_SSE2"
4317 {
4318 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4319
4320 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4321
4322 /* Fix up the destination if needed. */
4323 if (dst != operands[0])
4324 emit_move_insn (operands[0], dst);
4325
4326 DONE;
4327 })
4328
4329 ;; Avoid combining registers from different units in a single alternative,
4330 ;; see comment above inline_secondary_memory_needed function in i386.c
4331 (define_insn "sse2_loadlpd"
4332 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4333 "=x,x,x,x,x,x,x,x,m,m ,m")
4334 (vec_concat:V2DF
4335 (match_operand:DF 2 "nonimmediate_operand"
4336 " m,m,m,x,x,0,0,x,x,*f,r")
4337 (vec_select:DF
4338 (match_operand:V2DF 1 "vector_move_operand"
4339 " C,0,x,0,x,x,o,o,0,0 ,0")
4340 (parallel [(const_int 1)]))))]
4341 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4342 "@
4343 %vmovsd\t{%2, %0|%0, %2}
4344 movlpd\t{%2, %0|%0, %2}
4345 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4346 movsd\t{%2, %0|%0, %2}
4347 vmovsd\t{%2, %1, %0|%0, %1, %2}
4348 shufpd\t{$2, %1, %0|%0, %1, 2}
4349 movhpd\t{%H1, %0|%0, %H1}
4350 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4351 #
4352 #
4353 #"
4354 [(set_attr "isa" "base,noavx,avx,noavx,avx,noavx,noavx,avx,base,base,base")
4355 (set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov,fmov,imov")
4356 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4357 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4358 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4359 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4360
4361 (define_split
4362 [(set (match_operand:V2DF 0 "memory_operand" "")
4363 (vec_concat:V2DF
4364 (match_operand:DF 1 "register_operand" "")
4365 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4366 "TARGET_SSE2 && reload_completed"
4367 [(set (match_dup 0) (match_dup 1))]
4368 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4369
4370 ;; Not sure these two are ever used, but it doesn't hurt to have
4371 ;; them. -aoliva
4372 (define_insn "*vec_extractv2df_1_sse"
4373 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4374 (vec_select:DF
4375 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4376 (parallel [(const_int 1)])))]
4377 "!TARGET_SSE2 && TARGET_SSE
4378 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4379 "@
4380 movhps\t{%1, %0|%0, %1}
4381 movhlps\t{%1, %0|%0, %1}
4382 movlps\t{%H1, %0|%0, %H1}"
4383 [(set_attr "type" "ssemov")
4384 (set_attr "mode" "V2SF,V4SF,V2SF")])
4385
4386 (define_insn "*vec_extractv2df_0_sse"
4387 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4388 (vec_select:DF
4389 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4390 (parallel [(const_int 0)])))]
4391 "!TARGET_SSE2 && TARGET_SSE
4392 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4393 "@
4394 movlps\t{%1, %0|%0, %1}
4395 movaps\t{%1, %0|%0, %1}
4396 movlps\t{%1, %0|%0, %1}"
4397 [(set_attr "type" "ssemov")
4398 (set_attr "mode" "V2SF,V4SF,V2SF")])
4399
4400 (define_insn "sse2_movsd"
4401 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4402 (vec_merge:V2DF
4403 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4404 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4405 (const_int 1)))]
4406 "TARGET_SSE2"
4407 "@
4408 movsd\t{%2, %0|%0, %2}
4409 vmovsd\t{%2, %1, %0|%0, %1, %2}
4410 movlpd\t{%2, %0|%0, %2}
4411 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4412 %vmovlpd\t{%2, %0|%0, %2}
4413 shufpd\t{$2, %1, %0|%0, %1, 2}
4414 movhps\t{%H1, %0|%0, %H1}
4415 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4416 %vmovhps\t{%1, %H0|%H0, %1}"
4417 [(set_attr "isa" "noavx,avx,noavx,avx,base,noavx,noavx,avx,base")
4418 (set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4419 (set (attr "prefix_data16")
4420 (if_then_else
4421 (and (eq_attr "alternative" "2,4")
4422 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4423 (const_string "1")
4424 (const_string "*")))
4425 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4426 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4427 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4428
4429 (define_insn "*vec_dupv2df_sse3"
4430 [(set (match_operand:V2DF 0 "register_operand" "=x")
4431 (vec_duplicate:V2DF
4432 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4433 "TARGET_SSE3"
4434 "%vmovddup\t{%1, %0|%0, %1}"
4435 [(set_attr "type" "sselog1")
4436 (set_attr "prefix" "maybe_vex")
4437 (set_attr "mode" "DF")])
4438
4439 (define_insn "vec_dupv2df"
4440 [(set (match_operand:V2DF 0 "register_operand" "=x")
4441 (vec_duplicate:V2DF
4442 (match_operand:DF 1 "register_operand" "0")))]
4443 "TARGET_SSE2"
4444 "unpcklpd\t%0, %0"
4445 [(set_attr "type" "sselog1")
4446 (set_attr "mode" "V2DF")])
4447
4448 (define_insn "*vec_concatv2df_sse3"
4449 [(set (match_operand:V2DF 0 "register_operand" "=x")
4450 (vec_concat:V2DF
4451 (match_operand:DF 1 "nonimmediate_operand" "xm")
4452 (match_dup 1)))]
4453 "TARGET_SSE3"
4454 "%vmovddup\t{%1, %0|%0, %1}"
4455 [(set_attr "type" "sselog1")
4456 (set_attr "prefix" "maybe_vex")
4457 (set_attr "mode" "DF")])
4458
4459 (define_insn "*vec_concatv2df"
4460 [(set (match_operand:V2DF 0 "register_operand" "=Y2,x,Y2,x,Y2,x,x")
4461 (vec_concat:V2DF
4462 (match_operand:DF 1 "nonimmediate_operand" " 0 ,x,0 ,x,m ,0,0")
4463 (match_operand:DF 2 "vector_move_operand" " Y2,x,m ,m,C ,x,m")))]
4464 "TARGET_SSE"
4465 "@
4466 unpcklpd\t{%2, %0|%0, %2}
4467 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4468 movhpd\t{%2, %0|%0, %2}
4469 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4470 %vmovsd\t{%1, %0|%0, %1}
4471 movlhps\t{%2, %0|%0, %2}
4472 movhps\t{%2, %0|%0, %2}"
4473 [(set_attr "isa" "noavx,avx,noavx,avx,base,noavx,noavx")
4474 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov,ssemov,ssemov")
4475 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4476 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4477 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4478
4479 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4480 ;;
4481 ;; Parallel integral arithmetic
4482 ;;
4483 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4484
4485 (define_expand "neg<mode>2"
4486 [(set (match_operand:VI_128 0 "register_operand" "")
4487 (minus:VI_128
4488 (match_dup 2)
4489 (match_operand:VI_128 1 "nonimmediate_operand" "")))]
4490 "TARGET_SSE2"
4491 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4492
4493 (define_expand "<plusminus_insn><mode>3"
4494 [(set (match_operand:VI_128 0 "register_operand" "")
4495 (plusminus:VI_128
4496 (match_operand:VI_128 1 "nonimmediate_operand" "")
4497 (match_operand:VI_128 2 "nonimmediate_operand" "")))]
4498 "TARGET_SSE2"
4499 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4500
4501 (define_insn "*<plusminus_insn><mode>3"
4502 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
4503 (plusminus:VI_128
4504 (match_operand:VI_128 1 "nonimmediate_operand" "<comm>0,x")
4505 (match_operand:VI_128 2 "nonimmediate_operand" "xm,xm")))]
4506 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4507 "@
4508 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4509 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4510 [(set_attr "isa" "noavx,avx")
4511 (set_attr "type" "sseiadd")
4512 (set_attr "prefix_data16" "1,*")
4513 (set_attr "prefix" "orig,vex")
4514 (set_attr "mode" "TI")])
4515
4516 (define_expand "sse2_<plusminus_insn><mode>3"
4517 [(set (match_operand:VI12_128 0 "register_operand" "")
4518 (sat_plusminus:VI12_128
4519 (match_operand:VI12_128 1 "nonimmediate_operand" "")
4520 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
4521 "TARGET_SSE2"
4522 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4523
4524 (define_insn "*sse2_<plusminus_insn><mode>3"
4525 [(set (match_operand:VI12_128 0 "register_operand" "=x,x")
4526 (sat_plusminus:VI12_128
4527 (match_operand:VI12_128 1 "nonimmediate_operand" "<comm>0,x")
4528 (match_operand:VI12_128 2 "nonimmediate_operand" "xm,xm")))]
4529 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4530 "@
4531 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4532 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4533 [(set_attr "isa" "noavx,avx")
4534 (set_attr "type" "sseiadd")
4535 (set_attr "prefix_data16" "1,*")
4536 (set_attr "prefix" "orig,vex")
4537 (set_attr "mode" "TI")])
4538
4539 (define_insn_and_split "mulv16qi3"
4540 [(set (match_operand:V16QI 0 "register_operand" "")
4541 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4542 (match_operand:V16QI 2 "register_operand" "")))]
4543 "TARGET_SSE2
4544 && can_create_pseudo_p ()"
4545 "#"
4546 "&& 1"
4547 [(const_int 0)]
4548 {
4549 rtx t[6];
4550 int i;
4551
4552 for (i = 0; i < 6; ++i)
4553 t[i] = gen_reg_rtx (V16QImode);
4554
4555 /* Unpack data such that we've got a source byte in each low byte of
4556 each word. We don't care what goes into the high byte of each word.
4557 Rather than trying to get zero in there, most convenient is to let
4558 it be a copy of the low byte. */
4559 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
4560 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
4561 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
4562 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
4563
4564 /* Multiply words. The end-of-line annotations here give a picture of what
4565 the output of that instruction looks like. Dot means don't care; the
4566 letters are the bytes of the result with A being the most significant. */
4567 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4568 gen_lowpart (V8HImode, t[0]),
4569 gen_lowpart (V8HImode, t[1])));
4570 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4571 gen_lowpart (V8HImode, t[2]),
4572 gen_lowpart (V8HImode, t[3])));
4573
4574 /* Extract the even bytes and merge them back together. */
4575 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4576 DONE;
4577 })
4578
4579 (define_expand "mulv8hi3"
4580 [(set (match_operand:V8HI 0 "register_operand" "")
4581 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4582 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4583 "TARGET_SSE2"
4584 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4585
4586 (define_insn "*mulv8hi3"
4587 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
4588 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
4589 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
4590 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4591 "@
4592 pmullw\t{%2, %0|%0, %2}
4593 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4594 [(set_attr "isa" "noavx,avx")
4595 (set_attr "type" "sseimul")
4596 (set_attr "prefix_data16" "1,*")
4597 (set_attr "prefix" "orig,vex")
4598 (set_attr "mode" "TI")])
4599
4600 (define_expand "<s>mulv8hi3_highpart"
4601 [(set (match_operand:V8HI 0 "register_operand" "")
4602 (truncate:V8HI
4603 (lshiftrt:V8SI
4604 (mult:V8SI
4605 (any_extend:V8SI
4606 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4607 (any_extend:V8SI
4608 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4609 (const_int 16))))]
4610 "TARGET_SSE2"
4611 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4612
4613 (define_insn "*<s>mulv8hi3_highpart"
4614 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
4615 (truncate:V8HI
4616 (lshiftrt:V8SI
4617 (mult:V8SI
4618 (any_extend:V8SI
4619 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
4620 (any_extend:V8SI
4621 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
4622 (const_int 16))))]
4623 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4624 "@
4625 pmulh<u>w\t{%2, %0|%0, %2}
4626 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
4627 [(set_attr "isa" "noavx,avx")
4628 (set_attr "type" "sseimul")
4629 (set_attr "prefix_data16" "1,*")
4630 (set_attr "prefix" "orig,vex")
4631 (set_attr "mode" "TI")])
4632
4633 (define_expand "sse2_umulv2siv2di3"
4634 [(set (match_operand:V2DI 0 "register_operand" "")
4635 (mult:V2DI
4636 (zero_extend:V2DI
4637 (vec_select:V2SI
4638 (match_operand:V4SI 1 "nonimmediate_operand" "")
4639 (parallel [(const_int 0) (const_int 2)])))
4640 (zero_extend:V2DI
4641 (vec_select:V2SI
4642 (match_operand:V4SI 2 "nonimmediate_operand" "")
4643 (parallel [(const_int 0) (const_int 2)])))))]
4644 "TARGET_SSE2"
4645 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4646
4647 (define_insn "*sse2_umulv2siv2di3"
4648 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4649 (mult:V2DI
4650 (zero_extend:V2DI
4651 (vec_select:V2SI
4652 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4653 (parallel [(const_int 0) (const_int 2)])))
4654 (zero_extend:V2DI
4655 (vec_select:V2SI
4656 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4657 (parallel [(const_int 0) (const_int 2)])))))]
4658 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4659 "@
4660 pmuludq\t{%2, %0|%0, %2}
4661 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4662 [(set_attr "isa" "noavx,avx")
4663 (set_attr "type" "sseimul")
4664 (set_attr "prefix_data16" "1,*")
4665 (set_attr "prefix" "orig,vex")
4666 (set_attr "mode" "TI")])
4667
4668 (define_expand "sse4_1_mulv2siv2di3"
4669 [(set (match_operand:V2DI 0 "register_operand" "")
4670 (mult:V2DI
4671 (sign_extend:V2DI
4672 (vec_select:V2SI
4673 (match_operand:V4SI 1 "nonimmediate_operand" "")
4674 (parallel [(const_int 0) (const_int 2)])))
4675 (sign_extend:V2DI
4676 (vec_select:V2SI
4677 (match_operand:V4SI 2 "nonimmediate_operand" "")
4678 (parallel [(const_int 0) (const_int 2)])))))]
4679 "TARGET_SSE4_1"
4680 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4681
4682 (define_insn "*sse4_1_mulv2siv2di3"
4683 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4684 (mult:V2DI
4685 (sign_extend:V2DI
4686 (vec_select:V2SI
4687 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4688 (parallel [(const_int 0) (const_int 2)])))
4689 (sign_extend:V2DI
4690 (vec_select:V2SI
4691 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4692 (parallel [(const_int 0) (const_int 2)])))))]
4693 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4694 "@
4695 pmuldq\t{%2, %0|%0, %2}
4696 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4697 [(set_attr "isa" "noavx,avx")
4698 (set_attr "type" "sseimul")
4699 (set_attr "prefix_data16" "1,*")
4700 (set_attr "prefix_extra" "1")
4701 (set_attr "prefix" "orig,vex")
4702 (set_attr "mode" "TI")])
4703
4704 (define_expand "sse2_pmaddwd"
4705 [(set (match_operand:V4SI 0 "register_operand" "")
4706 (plus:V4SI
4707 (mult:V4SI
4708 (sign_extend:V4SI
4709 (vec_select:V4HI
4710 (match_operand:V8HI 1 "nonimmediate_operand" "")
4711 (parallel [(const_int 0)
4712 (const_int 2)
4713 (const_int 4)
4714 (const_int 6)])))
4715 (sign_extend:V4SI
4716 (vec_select:V4HI
4717 (match_operand:V8HI 2 "nonimmediate_operand" "")
4718 (parallel [(const_int 0)
4719 (const_int 2)
4720 (const_int 4)
4721 (const_int 6)]))))
4722 (mult:V4SI
4723 (sign_extend:V4SI
4724 (vec_select:V4HI (match_dup 1)
4725 (parallel [(const_int 1)
4726 (const_int 3)
4727 (const_int 5)
4728 (const_int 7)])))
4729 (sign_extend:V4SI
4730 (vec_select:V4HI (match_dup 2)
4731 (parallel [(const_int 1)
4732 (const_int 3)
4733 (const_int 5)
4734 (const_int 7)]))))))]
4735 "TARGET_SSE2"
4736 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4737
4738 (define_insn "*sse2_pmaddwd"
4739 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
4740 (plus:V4SI
4741 (mult:V4SI
4742 (sign_extend:V4SI
4743 (vec_select:V4HI
4744 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
4745 (parallel [(const_int 0)
4746 (const_int 2)
4747 (const_int 4)
4748 (const_int 6)])))
4749 (sign_extend:V4SI
4750 (vec_select:V4HI
4751 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
4752 (parallel [(const_int 0)
4753 (const_int 2)
4754 (const_int 4)
4755 (const_int 6)]))))
4756 (mult:V4SI
4757 (sign_extend:V4SI
4758 (vec_select:V4HI (match_dup 1)
4759 (parallel [(const_int 1)
4760 (const_int 3)
4761 (const_int 5)
4762 (const_int 7)])))
4763 (sign_extend:V4SI
4764 (vec_select:V4HI (match_dup 2)
4765 (parallel [(const_int 1)
4766 (const_int 3)
4767 (const_int 5)
4768 (const_int 7)]))))))]
4769 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4770 "@
4771 pmaddwd\t{%2, %0|%0, %2}
4772 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
4773 [(set_attr "isa" "noavx,avx")
4774 (set_attr "type" "sseiadd")
4775 (set_attr "atom_unit" "simul")
4776 (set_attr "prefix_data16" "1,*")
4777 (set_attr "prefix" "orig,vex")
4778 (set_attr "mode" "TI")])
4779
4780 (define_expand "mulv4si3"
4781 [(set (match_operand:V4SI 0 "register_operand" "")
4782 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
4783 (match_operand:V4SI 2 "register_operand" "")))]
4784 "TARGET_SSE2"
4785 {
4786 if (TARGET_SSE4_1 || TARGET_AVX)
4787 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
4788 })
4789
4790 (define_insn "*sse4_1_mulv4si3"
4791 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
4792 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4793 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")))]
4794 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4795 "@
4796 pmulld\t{%2, %0|%0, %2}
4797 vpmulld\t{%2, %1, %0|%0, %1, %2}"
4798 [(set_attr "isa" "noavx,avx")
4799 (set_attr "type" "sseimul")
4800 (set_attr "prefix_extra" "1")
4801 (set_attr "prefix" "orig,vex")
4802 (set_attr "mode" "TI")])
4803
4804 (define_insn_and_split "*sse2_mulv4si3"
4805 [(set (match_operand:V4SI 0 "register_operand" "")
4806 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
4807 (match_operand:V4SI 2 "register_operand" "")))]
4808 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
4809 && can_create_pseudo_p ()"
4810 "#"
4811 "&& 1"
4812 [(const_int 0)]
4813 {
4814 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
4815 rtx op0, op1, op2;
4816
4817 op0 = operands[0];
4818 op1 = operands[1];
4819 op2 = operands[2];
4820 t1 = gen_reg_rtx (V4SImode);
4821 t2 = gen_reg_rtx (V4SImode);
4822 t3 = gen_reg_rtx (V4SImode);
4823 t4 = gen_reg_rtx (V4SImode);
4824 t5 = gen_reg_rtx (V4SImode);
4825 t6 = gen_reg_rtx (V4SImode);
4826 thirtytwo = GEN_INT (32);
4827
4828 /* Multiply elements 2 and 0. */
4829 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
4830 op1, op2));
4831
4832 /* Shift both input vectors down one element, so that elements 3
4833 and 1 are now in the slots for elements 2 and 0. For K8, at
4834 least, this is faster than using a shuffle. */
4835 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
4836 gen_lowpart (V1TImode, op1),
4837 thirtytwo));
4838 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
4839 gen_lowpart (V1TImode, op2),
4840 thirtytwo));
4841 /* Multiply elements 3 and 1. */
4842 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
4843 t2, t3));
4844
4845 /* Move the results in element 2 down to element 1; we don't care
4846 what goes in elements 2 and 3. */
4847 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
4848 const0_rtx, const0_rtx));
4849 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
4850 const0_rtx, const0_rtx));
4851
4852 /* Merge the parts back together. */
4853 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
4854 DONE;
4855 })
4856
4857 (define_insn_and_split "mulv2di3"
4858 [(set (match_operand:V2DI 0 "register_operand" "")
4859 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
4860 (match_operand:V2DI 2 "register_operand" "")))]
4861 "TARGET_SSE2
4862 && can_create_pseudo_p ()"
4863 "#"
4864 "&& 1"
4865 [(const_int 0)]
4866 {
4867 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
4868 rtx op0, op1, op2;
4869
4870 op0 = operands[0];
4871 op1 = operands[1];
4872 op2 = operands[2];
4873
4874 if (TARGET_XOP)
4875 {
4876 /* op1: A,B,C,D, op2: E,F,G,H */
4877 op1 = gen_lowpart (V4SImode, op1);
4878 op2 = gen_lowpart (V4SImode, op2);
4879
4880 t1 = gen_reg_rtx (V4SImode);
4881 t2 = gen_reg_rtx (V4SImode);
4882 t3 = gen_reg_rtx (V2DImode);
4883 t4 = gen_reg_rtx (V2DImode);
4884
4885 /* t1: B,A,D,C */
4886 emit_insn (gen_sse2_pshufd_1 (t1, op1,
4887 GEN_INT (1),
4888 GEN_INT (0),
4889 GEN_INT (3),
4890 GEN_INT (2)));
4891
4892 /* t2: (B*E),(A*F),(D*G),(C*H) */
4893 emit_insn (gen_mulv4si3 (t2, t1, op2));
4894
4895 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
4896 emit_insn (gen_xop_phadddq (t3, t2));
4897
4898 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
4899 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
4900
4901 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
4902 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
4903 }
4904 else
4905 {
4906 t1 = gen_reg_rtx (V2DImode);
4907 t2 = gen_reg_rtx (V2DImode);
4908 t3 = gen_reg_rtx (V2DImode);
4909 t4 = gen_reg_rtx (V2DImode);
4910 t5 = gen_reg_rtx (V2DImode);
4911 t6 = gen_reg_rtx (V2DImode);
4912 thirtytwo = GEN_INT (32);
4913
4914 /* Multiply low parts. */
4915 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
4916 gen_lowpart (V4SImode, op2)));
4917
4918 /* Shift input vectors left 32 bits so we can multiply high parts. */
4919 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
4920 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
4921
4922 /* Multiply high parts by low parts. */
4923 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
4924 gen_lowpart (V4SImode, t3)));
4925 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
4926 gen_lowpart (V4SImode, t2)));
4927
4928 /* Shift them back. */
4929 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
4930 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
4931
4932 /* Add the three parts together. */
4933 emit_insn (gen_addv2di3 (t6, t1, t4));
4934 emit_insn (gen_addv2di3 (op0, t6, t5));
4935 }
4936 DONE;
4937 })
4938
4939 (define_expand "vec_widen_smult_hi_v8hi"
4940 [(match_operand:V4SI 0 "register_operand" "")
4941 (match_operand:V8HI 1 "register_operand" "")
4942 (match_operand:V8HI 2 "register_operand" "")]
4943 "TARGET_SSE2"
4944 {
4945 rtx op1, op2, t1, t2, dest;
4946
4947 op1 = operands[1];
4948 op2 = operands[2];
4949 t1 = gen_reg_rtx (V8HImode);
4950 t2 = gen_reg_rtx (V8HImode);
4951 dest = gen_lowpart (V8HImode, operands[0]);
4952
4953 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4954 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
4955 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
4956 DONE;
4957 })
4958
4959 (define_expand "vec_widen_smult_lo_v8hi"
4960 [(match_operand:V4SI 0 "register_operand" "")
4961 (match_operand:V8HI 1 "register_operand" "")
4962 (match_operand:V8HI 2 "register_operand" "")]
4963 "TARGET_SSE2"
4964 {
4965 rtx op1, op2, t1, t2, dest;
4966
4967 op1 = operands[1];
4968 op2 = operands[2];
4969 t1 = gen_reg_rtx (V8HImode);
4970 t2 = gen_reg_rtx (V8HImode);
4971 dest = gen_lowpart (V8HImode, operands[0]);
4972
4973 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4974 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
4975 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
4976 DONE;
4977 })
4978
4979 (define_expand "vec_widen_umult_hi_v8hi"
4980 [(match_operand:V4SI 0 "register_operand" "")
4981 (match_operand:V8HI 1 "register_operand" "")
4982 (match_operand:V8HI 2 "register_operand" "")]
4983 "TARGET_SSE2"
4984 {
4985 rtx op1, op2, t1, t2, dest;
4986
4987 op1 = operands[1];
4988 op2 = operands[2];
4989 t1 = gen_reg_rtx (V8HImode);
4990 t2 = gen_reg_rtx (V8HImode);
4991 dest = gen_lowpart (V8HImode, operands[0]);
4992
4993 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4994 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
4995 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
4996 DONE;
4997 })
4998
4999 (define_expand "vec_widen_umult_lo_v8hi"
5000 [(match_operand:V4SI 0 "register_operand" "")
5001 (match_operand:V8HI 1 "register_operand" "")
5002 (match_operand:V8HI 2 "register_operand" "")]
5003 "TARGET_SSE2"
5004 {
5005 rtx op1, op2, t1, t2, dest;
5006
5007 op1 = operands[1];
5008 op2 = operands[2];
5009 t1 = gen_reg_rtx (V8HImode);
5010 t2 = gen_reg_rtx (V8HImode);
5011 dest = gen_lowpart (V8HImode, operands[0]);
5012
5013 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5014 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5015 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5016 DONE;
5017 })
5018
5019 (define_expand "vec_widen_smult_hi_v4si"
5020 [(match_operand:V2DI 0 "register_operand" "")
5021 (match_operand:V4SI 1 "register_operand" "")
5022 (match_operand:V4SI 2 "register_operand" "")]
5023 "TARGET_XOP"
5024 {
5025 rtx t1, t2;
5026
5027 t1 = gen_reg_rtx (V4SImode);
5028 t2 = gen_reg_rtx (V4SImode);
5029
5030 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5031 GEN_INT (0),
5032 GEN_INT (2),
5033 GEN_INT (1),
5034 GEN_INT (3)));
5035 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5036 GEN_INT (0),
5037 GEN_INT (2),
5038 GEN_INT (1),
5039 GEN_INT (3)));
5040 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5041 DONE;
5042 })
5043
5044 (define_expand "vec_widen_smult_lo_v4si"
5045 [(match_operand:V2DI 0 "register_operand" "")
5046 (match_operand:V4SI 1 "register_operand" "")
5047 (match_operand:V4SI 2 "register_operand" "")]
5048 "TARGET_XOP"
5049 {
5050 rtx t1, t2;
5051
5052 t1 = gen_reg_rtx (V4SImode);
5053 t2 = gen_reg_rtx (V4SImode);
5054
5055 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5056 GEN_INT (0),
5057 GEN_INT (2),
5058 GEN_INT (1),
5059 GEN_INT (3)));
5060 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5061 GEN_INT (0),
5062 GEN_INT (2),
5063 GEN_INT (1),
5064 GEN_INT (3)));
5065 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5066 DONE;
5067 })
5068
5069 (define_expand "vec_widen_umult_hi_v4si"
5070 [(match_operand:V2DI 0 "register_operand" "")
5071 (match_operand:V4SI 1 "register_operand" "")
5072 (match_operand:V4SI 2 "register_operand" "")]
5073 "TARGET_SSE2"
5074 {
5075 rtx op1, op2, t1, t2;
5076
5077 op1 = operands[1];
5078 op2 = operands[2];
5079 t1 = gen_reg_rtx (V4SImode);
5080 t2 = gen_reg_rtx (V4SImode);
5081
5082 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5083 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5084 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5085 DONE;
5086 })
5087
5088 (define_expand "vec_widen_umult_lo_v4si"
5089 [(match_operand:V2DI 0 "register_operand" "")
5090 (match_operand:V4SI 1 "register_operand" "")
5091 (match_operand:V4SI 2 "register_operand" "")]
5092 "TARGET_SSE2"
5093 {
5094 rtx op1, op2, t1, t2;
5095
5096 op1 = operands[1];
5097 op2 = operands[2];
5098 t1 = gen_reg_rtx (V4SImode);
5099 t2 = gen_reg_rtx (V4SImode);
5100
5101 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5102 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5103 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5104 DONE;
5105 })
5106
5107 (define_expand "sdot_prodv8hi"
5108 [(match_operand:V4SI 0 "register_operand" "")
5109 (match_operand:V8HI 1 "register_operand" "")
5110 (match_operand:V8HI 2 "register_operand" "")
5111 (match_operand:V4SI 3 "register_operand" "")]
5112 "TARGET_SSE2"
5113 {
5114 rtx t = gen_reg_rtx (V4SImode);
5115 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5116 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5117 DONE;
5118 })
5119
5120 (define_expand "udot_prodv4si"
5121 [(match_operand:V2DI 0 "register_operand" "")
5122 (match_operand:V4SI 1 "register_operand" "")
5123 (match_operand:V4SI 2 "register_operand" "")
5124 (match_operand:V2DI 3 "register_operand" "")]
5125 "TARGET_SSE2"
5126 {
5127 rtx t1, t2, t3, t4;
5128
5129 t1 = gen_reg_rtx (V2DImode);
5130 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5131 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5132
5133 t2 = gen_reg_rtx (V4SImode);
5134 t3 = gen_reg_rtx (V4SImode);
5135 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5136 gen_lowpart (V1TImode, operands[1]),
5137 GEN_INT (32)));
5138 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5139 gen_lowpart (V1TImode, operands[2]),
5140 GEN_INT (32)));
5141
5142 t4 = gen_reg_rtx (V2DImode);
5143 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5144
5145 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5146 DONE;
5147 })
5148
5149 (define_insn "ashr<mode>3"
5150 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5151 (ashiftrt:VI24_128
5152 (match_operand:VI24_128 1 "register_operand" "0,x")
5153 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5154 "TARGET_SSE2"
5155 "@
5156 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5157 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5158 [(set_attr "isa" "noavx,avx")
5159 (set_attr "type" "sseishft")
5160 (set (attr "length_immediate")
5161 (if_then_else (match_operand 2 "const_int_operand" "")
5162 (const_string "1")
5163 (const_string "0")))
5164 (set_attr "prefix_data16" "1,*")
5165 (set_attr "prefix" "orig,vex")
5166 (set_attr "mode" "TI")])
5167
5168 (define_insn "lshr<mode>3"
5169 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5170 (lshiftrt:VI248_128
5171 (match_operand:VI248_128 1 "register_operand" "0,x")
5172 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5173 "TARGET_SSE2"
5174 "@
5175 psrl<ssemodesuffix>\t{%2, %0|%0, %2}
5176 vpsrl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5177 [(set_attr "isa" "noavx,avx")
5178 (set_attr "type" "sseishft")
5179 (set (attr "length_immediate")
5180 (if_then_else (match_operand 2 "const_int_operand" "")
5181 (const_string "1")
5182 (const_string "0")))
5183 (set_attr "prefix_data16" "1,*")
5184 (set_attr "prefix" "orig,vex")
5185 (set_attr "mode" "TI")])
5186
5187 (define_insn "ashl<mode>3"
5188 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5189 (ashift:VI248_128
5190 (match_operand:VI248_128 1 "register_operand" "0,x")
5191 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5192 "TARGET_SSE2"
5193 "@
5194 psll<ssemodesuffix>\t{%2, %0|%0, %2}
5195 vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5196 [(set_attr "isa" "noavx,avx")
5197 (set_attr "type" "sseishft")
5198 (set (attr "length_immediate")
5199 (if_then_else (match_operand 2 "const_int_operand" "")
5200 (const_string "1")
5201 (const_string "0")))
5202 (set_attr "prefix_data16" "1,*")
5203 (set_attr "prefix" "orig,vex")
5204 (set_attr "mode" "TI")])
5205
5206 (define_expand "vec_shl_<mode>"
5207 [(set (match_operand:VI_128 0 "register_operand" "")
5208 (ashift:V1TI
5209 (match_operand:VI_128 1 "register_operand" "")
5210 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5211 "TARGET_SSE2"
5212 {
5213 operands[0] = gen_lowpart (V1TImode, operands[0]);
5214 operands[1] = gen_lowpart (V1TImode, operands[1]);
5215 })
5216
5217 (define_insn "sse2_ashlv1ti3"
5218 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5219 (ashift:V1TI
5220 (match_operand:V1TI 1 "register_operand" "0,x")
5221 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5222 "TARGET_SSE2"
5223 {
5224 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5225
5226 switch (which_alternative)
5227 {
5228 case 0:
5229 return "pslldq\t{%2, %0|%0, %2}";
5230 case 1:
5231 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5232 default:
5233 gcc_unreachable ();
5234 }
5235 }
5236 [(set_attr "isa" "noavx,avx")
5237 (set_attr "type" "sseishft")
5238 (set_attr "length_immediate" "1")
5239 (set_attr "prefix_data16" "1,*")
5240 (set_attr "prefix" "orig,vex")
5241 (set_attr "mode" "TI")])
5242
5243 (define_expand "vec_shr_<mode>"
5244 [(set (match_operand:VI_128 0 "register_operand" "")
5245 (lshiftrt:V1TI
5246 (match_operand:VI_128 1 "register_operand" "")
5247 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5248 "TARGET_SSE2"
5249 {
5250 operands[0] = gen_lowpart (V1TImode, operands[0]);
5251 operands[1] = gen_lowpart (V1TImode, operands[1]);
5252 })
5253
5254 (define_insn "sse2_lshrv1ti3"
5255 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5256 (lshiftrt:V1TI
5257 (match_operand:V1TI 1 "register_operand" "0,x")
5258 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5259 "TARGET_SSE2"
5260 {
5261 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5262
5263 switch (which_alternative)
5264 {
5265 case 0:
5266 return "psrldq\t{%2, %0|%0, %2}";
5267 case 1:
5268 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5269 default:
5270 gcc_unreachable ();
5271 }
5272 }
5273 [(set_attr "isa" "noavx,avx")
5274 (set_attr "type" "sseishft")
5275 (set_attr "length_immediate" "1")
5276 (set_attr "atom_unit" "sishuf")
5277 (set_attr "prefix_data16" "1,*")
5278 (set_attr "prefix" "orig,vex")
5279 (set_attr "mode" "TI")])
5280
5281 (define_insn "*sse4_1_<code><mode>3"
5282 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5283 (smaxmin:VI14_128
5284 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5285 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5286 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5287 "@
5288 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5289 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5290 [(set_attr "isa" "noavx,avx")
5291 (set_attr "type" "sseiadd")
5292 (set_attr "prefix_extra" "1,*")
5293 (set_attr "prefix" "orig,vex")
5294 (set_attr "mode" "TI")])
5295
5296 (define_insn "*<code>v8hi3"
5297 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5298 (smaxmin:V8HI
5299 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5300 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5301 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5302 "@
5303 p<maxmin_int>w\t{%2, %0|%0, %2}
5304 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5305 [(set_attr "isa" "noavx,avx")
5306 (set_attr "type" "sseiadd")
5307 (set_attr "prefix_data16" "1,*")
5308 (set_attr "prefix_extra" "*,1")
5309 (set_attr "prefix" "orig,vex")
5310 (set_attr "mode" "TI")])
5311
5312 (define_expand "smax<mode>3"
5313 [(set (match_operand:VI14_128 0 "register_operand" "")
5314 (smax:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5315 (match_operand:VI14_128 2 "register_operand" "")))]
5316 "TARGET_SSE2"
5317 {
5318 if (TARGET_SSE4_1)
5319 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5320 else
5321 {
5322 rtx xops[6];
5323 bool ok;
5324
5325 xops[0] = operands[0];
5326 xops[1] = operands[1];
5327 xops[2] = operands[2];
5328 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5329 xops[4] = operands[1];
5330 xops[5] = operands[2];
5331 ok = ix86_expand_int_vcond (xops);
5332 gcc_assert (ok);
5333 DONE;
5334 }
5335 })
5336
5337 (define_expand "smin<mode>3"
5338 [(set (match_operand:VI14_128 0 "register_operand" "")
5339 (smin:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5340 (match_operand:VI14_128 2 "register_operand" "")))]
5341 "TARGET_SSE2"
5342 {
5343 if (TARGET_SSE4_1)
5344 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5345 else
5346 {
5347 rtx xops[6];
5348 bool ok;
5349
5350 xops[0] = operands[0];
5351 xops[1] = operands[2];
5352 xops[2] = operands[1];
5353 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5354 xops[4] = operands[1];
5355 xops[5] = operands[2];
5356 ok = ix86_expand_int_vcond (xops);
5357 gcc_assert (ok);
5358 DONE;
5359 }
5360 })
5361
5362 (define_expand "<code>v8hi3"
5363 [(set (match_operand:V8HI 0 "register_operand" "")
5364 (smaxmin:V8HI
5365 (match_operand:V8HI 1 "nonimmediate_operand" "")
5366 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5367 "TARGET_SSE2"
5368 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5369
5370 (define_expand "smaxv2di3"
5371 [(set (match_operand:V2DI 0 "register_operand" "")
5372 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
5373 (match_operand:V2DI 2 "register_operand" "")))]
5374 "TARGET_SSE4_2"
5375 {
5376 rtx xops[6];
5377 bool ok;
5378
5379 xops[0] = operands[0];
5380 xops[1] = operands[1];
5381 xops[2] = operands[2];
5382 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5383 xops[4] = operands[1];
5384 xops[5] = operands[2];
5385 ok = ix86_expand_int_vcond (xops);
5386 gcc_assert (ok);
5387 DONE;
5388 })
5389
5390 (define_expand "sminv2di3"
5391 [(set (match_operand:V2DI 0 "register_operand" "")
5392 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
5393 (match_operand:V2DI 2 "register_operand" "")))]
5394 "TARGET_SSE4_2"
5395 {
5396 rtx xops[6];
5397 bool ok;
5398
5399 xops[0] = operands[0];
5400 xops[1] = operands[2];
5401 xops[2] = operands[1];
5402 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5403 xops[4] = operands[1];
5404 xops[5] = operands[2];
5405 ok = ix86_expand_int_vcond (xops);
5406 gcc_assert (ok);
5407 DONE;
5408 })
5409
5410 (define_insn "*sse4_1_<code><mode>3"
5411 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5412 (umaxmin:VI24_128
5413 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
5414 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
5415 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5416 "@
5417 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5418 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5419 [(set_attr "isa" "noavx,avx")
5420 (set_attr "type" "sseiadd")
5421 (set_attr "prefix_extra" "1,*")
5422 (set_attr "prefix" "orig,vex")
5423 (set_attr "mode" "TI")])
5424
5425 (define_insn "*<code>v16qi3"
5426 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5427 (umaxmin:V16QI
5428 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
5429 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
5430 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5431 "@
5432 p<maxmin_int>b\t{%2, %0|%0, %2}
5433 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
5434 [(set_attr "isa" "noavx,avx")
5435 (set_attr "type" "sseiadd")
5436 (set_attr "prefix_data16" "1,*")
5437 (set_attr "prefix_extra" "*,1")
5438 (set_attr "prefix" "orig,vex")
5439 (set_attr "mode" "TI")])
5440
5441 (define_expand "<code>v16qi3"
5442 [(set (match_operand:V16QI 0 "register_operand" "")
5443 (umaxmin:V16QI
5444 (match_operand:V16QI 1 "nonimmediate_operand" "")
5445 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5446 "TARGET_SSE2"
5447 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5448
5449 (define_expand "umaxv8hi3"
5450 [(set (match_operand:V8HI 0 "register_operand" "")
5451 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5452 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5453 "TARGET_SSE2"
5454 {
5455 if (TARGET_SSE4_1)
5456 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5457 else
5458 {
5459 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5460 if (rtx_equal_p (op3, op2))
5461 op3 = gen_reg_rtx (V8HImode);
5462 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5463 emit_insn (gen_addv8hi3 (op0, op3, op2));
5464 DONE;
5465 }
5466 })
5467
5468 (define_expand "umaxv4si3"
5469 [(set (match_operand:V4SI 0 "register_operand" "")
5470 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5471 (match_operand:V4SI 2 "register_operand" "")))]
5472 "TARGET_SSE2"
5473 {
5474 if (TARGET_SSE4_1)
5475 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5476 else
5477 {
5478 rtx xops[6];
5479 bool ok;
5480
5481 xops[0] = operands[0];
5482 xops[1] = operands[1];
5483 xops[2] = operands[2];
5484 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5485 xops[4] = operands[1];
5486 xops[5] = operands[2];
5487 ok = ix86_expand_int_vcond (xops);
5488 gcc_assert (ok);
5489 DONE;
5490 }
5491 })
5492
5493 (define_expand "umin<mode>3"
5494 [(set (match_operand:VI24_128 0 "register_operand" "")
5495 (umin:VI24_128 (match_operand:VI24_128 1 "register_operand" "")
5496 (match_operand:VI24_128 2 "register_operand" "")))]
5497 "TARGET_SSE2"
5498 {
5499 if (TARGET_SSE4_1)
5500 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5501 else
5502 {
5503 rtx xops[6];
5504 bool ok;
5505
5506 xops[0] = operands[0];
5507 xops[1] = operands[2];
5508 xops[2] = operands[1];
5509 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5510 xops[4] = operands[1];
5511 xops[5] = operands[2];
5512 ok = ix86_expand_int_vcond (xops);
5513 gcc_assert (ok);
5514 DONE;
5515 }
5516 })
5517
5518 (define_expand "umaxv2di3"
5519 [(set (match_operand:V2DI 0 "register_operand" "")
5520 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
5521 (match_operand:V2DI 2 "register_operand" "")))]
5522 "TARGET_SSE4_2"
5523 {
5524 rtx xops[6];
5525 bool ok;
5526
5527 xops[0] = operands[0];
5528 xops[1] = operands[1];
5529 xops[2] = operands[2];
5530 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5531 xops[4] = operands[1];
5532 xops[5] = operands[2];
5533 ok = ix86_expand_int_vcond (xops);
5534 gcc_assert (ok);
5535 DONE;
5536 })
5537
5538 (define_expand "uminv2di3"
5539 [(set (match_operand:V2DI 0 "register_operand" "")
5540 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
5541 (match_operand:V2DI 2 "register_operand" "")))]
5542 "TARGET_SSE4_2"
5543 {
5544 rtx xops[6];
5545 bool ok;
5546
5547 xops[0] = operands[0];
5548 xops[1] = operands[2];
5549 xops[2] = operands[1];
5550 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5551 xops[4] = operands[1];
5552 xops[5] = operands[2];
5553 ok = ix86_expand_int_vcond (xops);
5554 gcc_assert (ok);
5555 DONE;
5556 })
5557
5558 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5559 ;;
5560 ;; Parallel integral comparisons
5561 ;;
5562 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5563
5564 (define_insn "*sse4_1_eqv2di3"
5565 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5566 (eq:V2DI
5567 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
5568 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
5569 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
5570 "@
5571 pcmpeqq\t{%2, %0|%0, %2}
5572 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
5573 [(set_attr "isa" "noavx,avx")
5574 (set_attr "type" "ssecmp")
5575 (set_attr "prefix_extra" "1")
5576 (set_attr "prefix" "orig,vex")
5577 (set_attr "mode" "TI")])
5578
5579 (define_insn "*sse2_eq<mode>3"
5580 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
5581 (eq:VI124_128
5582 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
5583 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
5584 "TARGET_SSE2 && !TARGET_XOP
5585 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5586 "@
5587 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
5588 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5589 [(set_attr "isa" "noavx,avx")
5590 (set_attr "type" "ssecmp")
5591 (set_attr "prefix_data16" "1,*")
5592 (set_attr "prefix" "orig,vex")
5593 (set_attr "mode" "TI")])
5594
5595 (define_expand "sse2_eq<mode>3"
5596 [(set (match_operand:VI124_128 0 "register_operand" "")
5597 (eq:VI124_128
5598 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5599 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5600 "TARGET_SSE2 && !TARGET_XOP "
5601 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5602
5603 (define_expand "sse4_1_eqv2di3"
5604 [(set (match_operand:V2DI 0 "register_operand" "")
5605 (eq:V2DI
5606 (match_operand:V2DI 1 "nonimmediate_operand" "")
5607 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
5608 "TARGET_SSE4_1"
5609 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
5610
5611 (define_insn "sse4_2_gtv2di3"
5612 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5613 (gt:V2DI
5614 (match_operand:V2DI 1 "register_operand" "0,x")
5615 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
5616 "TARGET_SSE4_2"
5617 "@
5618 pcmpgtq\t{%2, %0|%0, %2}
5619 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
5620 [(set_attr "isa" "noavx,avx")
5621 (set_attr "type" "ssecmp")
5622 (set_attr "prefix_extra" "1")
5623 (set_attr "prefix" "orig,vex")
5624 (set_attr "mode" "TI")])
5625
5626 (define_insn "sse2_gt<mode>3"
5627 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
5628 (gt:VI124_128
5629 (match_operand:VI124_128 1 "register_operand" "0,x")
5630 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
5631 "TARGET_SSE2 && !TARGET_XOP"
5632 "@
5633 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
5634 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5635 [(set_attr "isa" "noavx,avx")
5636 (set_attr "type" "ssecmp")
5637 (set_attr "prefix_data16" "1,*")
5638 (set_attr "prefix" "orig,vex")
5639 (set_attr "mode" "TI")])
5640
5641 (define_expand "vcond<mode>"
5642 [(set (match_operand:VI124_128 0 "register_operand" "")
5643 (if_then_else:VI124_128
5644 (match_operator 3 ""
5645 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
5646 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
5647 (match_operand:VI124_128 1 "general_operand" "")
5648 (match_operand:VI124_128 2 "general_operand" "")))]
5649 "TARGET_SSE2"
5650 {
5651 bool ok = ix86_expand_int_vcond (operands);
5652 gcc_assert (ok);
5653 DONE;
5654 })
5655
5656 (define_expand "vcondv2di"
5657 [(set (match_operand:V2DI 0 "register_operand" "")
5658 (if_then_else:V2DI
5659 (match_operator 3 ""
5660 [(match_operand:V2DI 4 "nonimmediate_operand" "")
5661 (match_operand:V2DI 5 "nonimmediate_operand" "")])
5662 (match_operand:V2DI 1 "general_operand" "")
5663 (match_operand:V2DI 2 "general_operand" "")))]
5664 "TARGET_SSE4_2"
5665 {
5666 bool ok = ix86_expand_int_vcond (operands);
5667 gcc_assert (ok);
5668 DONE;
5669 })
5670
5671 (define_expand "vcondu<mode>"
5672 [(set (match_operand:VI124_128 0 "register_operand" "")
5673 (if_then_else:VI124_128
5674 (match_operator 3 ""
5675 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
5676 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
5677 (match_operand:VI124_128 1 "general_operand" "")
5678 (match_operand:VI124_128 2 "general_operand" "")))]
5679 "TARGET_SSE2"
5680 {
5681 bool ok = ix86_expand_int_vcond (operands);
5682 gcc_assert (ok);
5683 DONE;
5684 })
5685
5686 (define_expand "vconduv2di"
5687 [(set (match_operand:V2DI 0 "register_operand" "")
5688 (if_then_else:V2DI
5689 (match_operator 3 ""
5690 [(match_operand:V2DI 4 "nonimmediate_operand" "")
5691 (match_operand:V2DI 5 "nonimmediate_operand" "")])
5692 (match_operand:V2DI 1 "general_operand" "")
5693 (match_operand:V2DI 2 "general_operand" "")))]
5694 "TARGET_SSE4_2"
5695 {
5696 bool ok = ix86_expand_int_vcond (operands);
5697 gcc_assert (ok);
5698 DONE;
5699 })
5700
5701 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5702 ;;
5703 ;; Parallel bitwise logical operations
5704 ;;
5705 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5706
5707 (define_expand "one_cmpl<mode>2"
5708 [(set (match_operand:VI 0 "register_operand" "")
5709 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
5710 (match_dup 2)))]
5711 "TARGET_SSE"
5712 {
5713 int i, n = GET_MODE_NUNITS (<MODE>mode);
5714 rtvec v = rtvec_alloc (n);
5715
5716 for (i = 0; i < n; ++i)
5717 RTVEC_ELT (v, i) = constm1_rtx;
5718
5719 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
5720 })
5721
5722 (define_expand "sse2_andnot<mode>3"
5723 [(set (match_operand:VI_128 0 "register_operand" "")
5724 (and:VI_128
5725 (not:VI_128 (match_operand:VI_128 1 "register_operand" ""))
5726 (match_operand:VI_128 2 "nonimmediate_operand" "")))]
5727 "TARGET_SSE2")
5728
5729 (define_insn "*andnot<mode>3"
5730 [(set (match_operand:VI 0 "register_operand" "=x,x")
5731 (and:VI
5732 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
5733 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
5734 "TARGET_SSE"
5735 {
5736 static char buf[32];
5737 const char *ops;
5738 const char *tmp
5739 = (get_attr_mode (insn) == MODE_TI) ? "pandn" : "andnps";
5740
5741 switch (which_alternative)
5742 {
5743 case 0:
5744 ops = "%s\t{%%2, %%0|%%0, %%2}";
5745 break;
5746 case 1:
5747 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5748 break;
5749 default:
5750 gcc_unreachable ();
5751 }
5752
5753 snprintf (buf, sizeof (buf), ops, tmp);
5754 return buf;
5755 }
5756 [(set_attr "isa" "noavx,avx")
5757 (set_attr "type" "sselog")
5758 (set (attr "prefix_data16")
5759 (if_then_else
5760 (and (eq_attr "alternative" "0")
5761 (eq_attr "mode" "TI"))
5762 (const_string "1")
5763 (const_string "*")))
5764 (set_attr "prefix" "orig,vex")
5765 (set (attr "mode")
5766 (cond [(ne (symbol_ref "GET_MODE_SIZE (<MODE>mode) > 128") (const_int 0))
5767 (const_string "V8SF")
5768 (ne (symbol_ref "TARGET_SSE2") (const_int 0))
5769 (const_string "TI")
5770 ]
5771 (const_string "V4SF")))])
5772
5773 (define_expand "<code><mode>3"
5774 [(set (match_operand:VI 0 "register_operand" "")
5775 (any_logic:VI
5776 (match_operand:VI 1 "nonimmediate_operand" "")
5777 (match_operand:VI 2 "nonimmediate_operand" "")))]
5778 "TARGET_SSE"
5779 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5780
5781 (define_insn "*<code><mode>3"
5782 [(set (match_operand:VI 0 "register_operand" "=x,x")
5783 (any_logic:VI
5784 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
5785 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
5786 "TARGET_SSE
5787 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5788 {
5789 static char buf[32];
5790 const char *ops;
5791 const char *tmp
5792 = (get_attr_mode (insn) == MODE_TI) ? "p<logic>" : "<logic>ps";
5793
5794 switch (which_alternative)
5795 {
5796 case 0:
5797 ops = "%s\t{%%2, %%0|%%0, %%2}";
5798 break;
5799 case 1:
5800 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5801 break;
5802 default:
5803 gcc_unreachable ();
5804 }
5805
5806 snprintf (buf, sizeof (buf), ops, tmp);
5807 return buf;
5808 }
5809 [(set_attr "isa" "noavx,avx")
5810 (set_attr "type" "sselog")
5811 (set (attr "prefix_data16")
5812 (if_then_else
5813 (and (eq_attr "alternative" "0")
5814 (eq_attr "mode" "TI"))
5815 (const_string "1")
5816 (const_string "*")))
5817 (set_attr "prefix" "orig,vex")
5818 (set (attr "mode")
5819 (cond [(ne (symbol_ref "GET_MODE_SIZE (<MODE>mode) > 128") (const_int 0))
5820 (const_string "V8SF")
5821 (ne (symbol_ref "TARGET_SSE2") (const_int 0))
5822 (const_string "TI")
5823 ]
5824 (const_string "V4SF")))])
5825
5826 (define_insn "*andnottf3"
5827 [(set (match_operand:TF 0 "register_operand" "=x,x")
5828 (and:TF
5829 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
5830 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
5831 "TARGET_SSE2"
5832 "@
5833 pandn\t{%2, %0|%0, %2}
5834 vpandn\t{%2, %1, %0|%0, %1, %2}"
5835 [(set_attr "isa" "noavx,avx")
5836 (set_attr "type" "sselog")
5837 (set_attr "prefix_data16" "1,*")
5838 (set_attr "prefix" "orig,vex")
5839 (set_attr "mode" "TI")])
5840
5841 (define_expand "<code>tf3"
5842 [(set (match_operand:TF 0 "register_operand" "")
5843 (any_logic:TF
5844 (match_operand:TF 1 "nonimmediate_operand" "")
5845 (match_operand:TF 2 "nonimmediate_operand" "")))]
5846 "TARGET_SSE2"
5847 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
5848
5849 (define_insn "*<code>tf3"
5850 [(set (match_operand:TF 0 "register_operand" "=x,x")
5851 (any_logic:TF
5852 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
5853 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
5854 "TARGET_SSE2
5855 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
5856 "@
5857 p<logic>\t{%2, %0|%0, %2}
5858 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
5859 [(set_attr "isa" "noavx,avx")
5860 (set_attr "type" "sselog")
5861 (set_attr "prefix_data16" "1,*")
5862 (set_attr "prefix" "orig,vex")
5863 (set_attr "mode" "TI")])
5864
5865 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5866 ;;
5867 ;; Parallel integral element swizzling
5868 ;;
5869 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5870
5871 (define_expand "vec_pack_trunc_<mode>"
5872 [(match_operand:<ssepackmode> 0 "register_operand" "")
5873 (match_operand:VI248_128 1 "register_operand" "")
5874 (match_operand:VI248_128 2 "register_operand" "")]
5875 "TARGET_SSE2"
5876 {
5877 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
5878 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
5879 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
5880 DONE;
5881 })
5882
5883 (define_insn "sse2_packsswb"
5884 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5885 (vec_concat:V16QI
5886 (ss_truncate:V8QI
5887 (match_operand:V8HI 1 "register_operand" "0,x"))
5888 (ss_truncate:V8QI
5889 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))]
5890 "TARGET_SSE2"
5891 "@
5892 packsswb\t{%2, %0|%0, %2}
5893 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
5894 [(set_attr "isa" "noavx,avx")
5895 (set_attr "type" "sselog")
5896 (set_attr "prefix_data16" "1,*")
5897 (set_attr "prefix" "orig,vex")
5898 (set_attr "mode" "TI")])
5899
5900 (define_insn "sse2_packssdw"
5901 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5902 (vec_concat:V8HI
5903 (ss_truncate:V4HI
5904 (match_operand:V4SI 1 "register_operand" "0,x"))
5905 (ss_truncate:V4HI
5906 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
5907 "TARGET_SSE2"
5908 "@
5909 packssdw\t{%2, %0|%0, %2}
5910 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
5911 [(set_attr "isa" "noavx,avx")
5912 (set_attr "type" "sselog")
5913 (set_attr "prefix_data16" "1,*")
5914 (set_attr "prefix" "orig,vex")
5915 (set_attr "mode" "TI")])
5916
5917 (define_insn "sse2_packuswb"
5918 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5919 (vec_concat:V16QI
5920 (us_truncate:V8QI
5921 (match_operand:V8HI 1 "register_operand" "0,x"))
5922 (us_truncate:V8QI
5923 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))]
5924 "TARGET_SSE2"
5925 "@
5926 packuswb\t{%2, %0|%0, %2}
5927 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
5928 [(set_attr "isa" "noavx,avx")
5929 (set_attr "type" "sselog")
5930 (set_attr "prefix_data16" "1,*")
5931 (set_attr "prefix" "orig,vex")
5932 (set_attr "mode" "TI")])
5933
5934 (define_insn "vec_interleave_highv16qi"
5935 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5936 (vec_select:V16QI
5937 (vec_concat:V32QI
5938 (match_operand:V16QI 1 "register_operand" "0,x")
5939 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
5940 (parallel [(const_int 8) (const_int 24)
5941 (const_int 9) (const_int 25)
5942 (const_int 10) (const_int 26)
5943 (const_int 11) (const_int 27)
5944 (const_int 12) (const_int 28)
5945 (const_int 13) (const_int 29)
5946 (const_int 14) (const_int 30)
5947 (const_int 15) (const_int 31)])))]
5948 "TARGET_SSE2"
5949 "@
5950 punpckhbw\t{%2, %0|%0, %2}
5951 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
5952 [(set_attr "isa" "noavx,avx")
5953 (set_attr "type" "sselog")
5954 (set_attr "prefix_data16" "1,*")
5955 (set_attr "prefix" "orig,vex")
5956 (set_attr "mode" "TI")])
5957
5958 (define_insn "vec_interleave_lowv16qi"
5959 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5960 (vec_select:V16QI
5961 (vec_concat:V32QI
5962 (match_operand:V16QI 1 "register_operand" "0,x")
5963 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
5964 (parallel [(const_int 0) (const_int 16)
5965 (const_int 1) (const_int 17)
5966 (const_int 2) (const_int 18)
5967 (const_int 3) (const_int 19)
5968 (const_int 4) (const_int 20)
5969 (const_int 5) (const_int 21)
5970 (const_int 6) (const_int 22)
5971 (const_int 7) (const_int 23)])))]
5972 "TARGET_SSE2"
5973 "@
5974 punpcklbw\t{%2, %0|%0, %2}
5975 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
5976 [(set_attr "isa" "noavx,avx")
5977 (set_attr "type" "sselog")
5978 (set_attr "prefix_data16" "1,*")
5979 (set_attr "prefix" "orig,vex")
5980 (set_attr "mode" "TI")])
5981
5982 (define_insn "vec_interleave_highv8hi"
5983 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5984 (vec_select:V8HI
5985 (vec_concat:V16HI
5986 (match_operand:V8HI 1 "register_operand" "0,x")
5987 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
5988 (parallel [(const_int 4) (const_int 12)
5989 (const_int 5) (const_int 13)
5990 (const_int 6) (const_int 14)
5991 (const_int 7) (const_int 15)])))]
5992 "TARGET_SSE2"
5993 "@
5994 punpckhwd\t{%2, %0|%0, %2}
5995 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
5996 [(set_attr "isa" "noavx,avx")
5997 (set_attr "type" "sselog")
5998 (set_attr "prefix_data16" "1,*")
5999 (set_attr "prefix" "orig,vex")
6000 (set_attr "mode" "TI")])
6001
6002 (define_insn "vec_interleave_lowv8hi"
6003 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6004 (vec_select:V8HI
6005 (vec_concat:V16HI
6006 (match_operand:V8HI 1 "register_operand" "0,x")
6007 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6008 (parallel [(const_int 0) (const_int 8)
6009 (const_int 1) (const_int 9)
6010 (const_int 2) (const_int 10)
6011 (const_int 3) (const_int 11)])))]
6012 "TARGET_SSE2"
6013 "@
6014 punpcklwd\t{%2, %0|%0, %2}
6015 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6016 [(set_attr "isa" "noavx,avx")
6017 (set_attr "type" "sselog")
6018 (set_attr "prefix_data16" "1,*")
6019 (set_attr "prefix" "orig,vex")
6020 (set_attr "mode" "TI")])
6021
6022 (define_insn "vec_interleave_highv4si"
6023 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6024 (vec_select:V4SI
6025 (vec_concat:V8SI
6026 (match_operand:V4SI 1 "register_operand" "0,x")
6027 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6028 (parallel [(const_int 2) (const_int 6)
6029 (const_int 3) (const_int 7)])))]
6030 "TARGET_SSE2"
6031 "@
6032 punpckhdq\t{%2, %0|%0, %2}
6033 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6034 [(set_attr "isa" "noavx,avx")
6035 (set_attr "type" "sselog")
6036 (set_attr "prefix_data16" "1,*")
6037 (set_attr "prefix" "orig,vex")
6038 (set_attr "mode" "TI")])
6039
6040 (define_insn "vec_interleave_lowv4si"
6041 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6042 (vec_select:V4SI
6043 (vec_concat:V8SI
6044 (match_operand:V4SI 1 "register_operand" "0,x")
6045 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6046 (parallel [(const_int 0) (const_int 4)
6047 (const_int 1) (const_int 5)])))]
6048 "TARGET_SSE2"
6049 "@
6050 punpckldq\t{%2, %0|%0, %2}
6051 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6052 [(set_attr "isa" "noavx,avx")
6053 (set_attr "type" "sselog")
6054 (set_attr "prefix_data16" "1,*")
6055 (set_attr "prefix" "orig,vex")
6056 (set_attr "mode" "TI")])
6057
6058 (define_insn "sse4_1_pinsrb"
6059 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
6060 (vec_merge:V16QI
6061 (vec_duplicate:V16QI
6062 (match_operand:QI 2 "nonimmediate_operand" "r,m,r,m"))
6063 (match_operand:V16QI 1 "register_operand" "0,0,x,x")
6064 (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n,n,n,n")))]
6065 "TARGET_SSE4_1"
6066 {
6067 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6068
6069 switch (which_alternative)
6070 {
6071 case 0:
6072 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6073 case 1:
6074 return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6075 case 2:
6076 return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6077 case 3:
6078 return "vpinsrb\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6079 default:
6080 gcc_unreachable ();
6081 }
6082 }
6083 [(set_attr "isa" "noavx,noavx,avx,avx")
6084 (set_attr "type" "sselog")
6085 (set_attr "prefix_extra" "1")
6086 (set_attr "length_immediate" "1")
6087 (set_attr "prefix" "orig,orig,vex,vex")
6088 (set_attr "mode" "TI")])
6089
6090 (define_insn "sse2_pinsrw"
6091 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
6092 (vec_merge:V8HI
6093 (vec_duplicate:V8HI
6094 (match_operand:HI 2 "nonimmediate_operand" "r,m,r,m"))
6095 (match_operand:V8HI 1 "register_operand" "0,0,x,x")
6096 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n,n,n,n")))]
6097 "TARGET_SSE2"
6098 {
6099 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6100
6101 switch (which_alternative)
6102 {
6103 case 0:
6104 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6105 case 1:
6106 return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6107 case 2:
6108 return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6109 case 3:
6110 return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6111 default:
6112 gcc_unreachable ();
6113 }
6114 }
6115 [(set_attr "isa" "noavx,noavx,avx,avx")
6116 (set_attr "type" "sselog")
6117 (set_attr "prefix_data16" "1,1,*,*")
6118 (set_attr "prefix_extra" "*,*,1,1")
6119 (set_attr "length_immediate" "1")
6120 (set_attr "prefix" "orig,orig,vex,vex")
6121 (set_attr "mode" "TI")])
6122
6123 ;; It must come before sse2_loadld since it is preferred.
6124 (define_insn "sse4_1_pinsrd"
6125 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6126 (vec_merge:V4SI
6127 (vec_duplicate:V4SI
6128 (match_operand:SI 2 "nonimmediate_operand" "rm,rm"))
6129 (match_operand:V4SI 1 "register_operand" "0,x")
6130 (match_operand:SI 3 "const_pow2_1_to_8_operand" "n,n")))]
6131 "TARGET_SSE4_1"
6132 {
6133 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6134
6135 switch (which_alternative)
6136 {
6137 case 0:
6138 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6139 case 1:
6140 return "vpinsrd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6141 default:
6142 gcc_unreachable ();
6143 }
6144 }
6145 [(set_attr "isa" "noavx,avx")
6146 (set_attr "type" "sselog")
6147 (set_attr "prefix_extra" "1")
6148 (set_attr "length_immediate" "1")
6149 (set_attr "prefix" "orig,vex")
6150 (set_attr "mode" "TI")])
6151
6152 (define_insn "sse4_1_pinsrq"
6153 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6154 (vec_merge:V2DI
6155 (vec_duplicate:V2DI
6156 (match_operand:DI 2 "nonimmediate_operand" "rm,rm"))
6157 (match_operand:V2DI 1 "register_operand" "0,x")
6158 (match_operand:SI 3 "const_pow2_1_to_2_operand" "n,n")))]
6159 "TARGET_SSE4_1 && TARGET_64BIT"
6160 {
6161 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6162
6163 switch (which_alternative)
6164 {
6165 case 0:
6166 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6167 case 1:
6168 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6169 default:
6170 gcc_unreachable ();
6171 }
6172 }
6173 [(set_attr "isa" "noavx,avx")
6174 (set_attr "type" "sselog")
6175 (set_attr "prefix_rex" "1,*")
6176 (set_attr "prefix_extra" "1")
6177 (set_attr "length_immediate" "1")
6178 (set_attr "prefix" "orig,vex")
6179 (set_attr "mode" "TI")])
6180
6181 (define_insn "*sse4_1_pextrb_<mode>"
6182 [(set (match_operand:SWI48 0 "register_operand" "=r")
6183 (zero_extend:SWI48
6184 (vec_select:QI
6185 (match_operand:V16QI 1 "register_operand" "x")
6186 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6187 "TARGET_SSE4_1"
6188 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6189 [(set_attr "type" "sselog")
6190 (set_attr "prefix_extra" "1")
6191 (set_attr "length_immediate" "1")
6192 (set_attr "prefix" "maybe_vex")
6193 (set_attr "mode" "TI")])
6194
6195 (define_insn "*sse4_1_pextrb_memory"
6196 [(set (match_operand:QI 0 "memory_operand" "=m")
6197 (vec_select:QI
6198 (match_operand:V16QI 1 "register_operand" "x")
6199 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6200 "TARGET_SSE4_1"
6201 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6202 [(set_attr "type" "sselog")
6203 (set_attr "prefix_extra" "1")
6204 (set_attr "length_immediate" "1")
6205 (set_attr "prefix" "maybe_vex")
6206 (set_attr "mode" "TI")])
6207
6208 (define_insn "*sse2_pextrw_<mode>"
6209 [(set (match_operand:SWI48 0 "register_operand" "=r")
6210 (zero_extend:SWI48
6211 (vec_select:HI
6212 (match_operand:V8HI 1 "register_operand" "x")
6213 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6214 "TARGET_SSE2"
6215 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6216 [(set_attr "type" "sselog")
6217 (set_attr "prefix_data16" "1")
6218 (set_attr "length_immediate" "1")
6219 (set_attr "prefix" "maybe_vex")
6220 (set_attr "mode" "TI")])
6221
6222 (define_insn "*sse4_1_pextrw_memory"
6223 [(set (match_operand:HI 0 "memory_operand" "=m")
6224 (vec_select:HI
6225 (match_operand:V8HI 1 "register_operand" "x")
6226 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6227 "TARGET_SSE4_1"
6228 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6229 [(set_attr "type" "sselog")
6230 (set_attr "prefix_extra" "1")
6231 (set_attr "length_immediate" "1")
6232 (set_attr "prefix" "maybe_vex")
6233 (set_attr "mode" "TI")])
6234
6235 (define_insn "*sse4_1_pextrd"
6236 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6237 (vec_select:SI
6238 (match_operand:V4SI 1 "register_operand" "x")
6239 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6240 "TARGET_SSE4_1"
6241 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6242 [(set_attr "type" "sselog")
6243 (set_attr "prefix_extra" "1")
6244 (set_attr "length_immediate" "1")
6245 (set_attr "prefix" "maybe_vex")
6246 (set_attr "mode" "TI")])
6247
6248 (define_insn "*sse4_1_pextrd_zext"
6249 [(set (match_operand:DI 0 "register_operand" "=r")
6250 (zero_extend:DI
6251 (vec_select:SI
6252 (match_operand:V4SI 1 "register_operand" "x")
6253 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
6254 "TARGET_64BIT && TARGET_SSE4_1"
6255 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
6256 [(set_attr "type" "sselog")
6257 (set_attr "prefix_extra" "1")
6258 (set_attr "length_immediate" "1")
6259 (set_attr "prefix" "maybe_vex")
6260 (set_attr "mode" "TI")])
6261
6262 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6263 (define_insn "*sse4_1_pextrq"
6264 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6265 (vec_select:DI
6266 (match_operand:V2DI 1 "register_operand" "x")
6267 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6268 "TARGET_SSE4_1 && TARGET_64BIT"
6269 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6270 [(set_attr "type" "sselog")
6271 (set_attr "prefix_rex" "1")
6272 (set_attr "prefix_extra" "1")
6273 (set_attr "length_immediate" "1")
6274 (set_attr "prefix" "maybe_vex")
6275 (set_attr "mode" "TI")])
6276
6277 (define_expand "sse2_pshufd"
6278 [(match_operand:V4SI 0 "register_operand" "")
6279 (match_operand:V4SI 1 "nonimmediate_operand" "")
6280 (match_operand:SI 2 "const_int_operand" "")]
6281 "TARGET_SSE2"
6282 {
6283 int mask = INTVAL (operands[2]);
6284 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6285 GEN_INT ((mask >> 0) & 3),
6286 GEN_INT ((mask >> 2) & 3),
6287 GEN_INT ((mask >> 4) & 3),
6288 GEN_INT ((mask >> 6) & 3)));
6289 DONE;
6290 })
6291
6292 (define_insn "sse2_pshufd_1"
6293 [(set (match_operand:V4SI 0 "register_operand" "=x")
6294 (vec_select:V4SI
6295 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6296 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6297 (match_operand 3 "const_0_to_3_operand" "")
6298 (match_operand 4 "const_0_to_3_operand" "")
6299 (match_operand 5 "const_0_to_3_operand" "")])))]
6300 "TARGET_SSE2"
6301 {
6302 int mask = 0;
6303 mask |= INTVAL (operands[2]) << 0;
6304 mask |= INTVAL (operands[3]) << 2;
6305 mask |= INTVAL (operands[4]) << 4;
6306 mask |= INTVAL (operands[5]) << 6;
6307 operands[2] = GEN_INT (mask);
6308
6309 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6310 }
6311 [(set_attr "type" "sselog1")
6312 (set_attr "prefix_data16" "1")
6313 (set_attr "prefix" "maybe_vex")
6314 (set_attr "length_immediate" "1")
6315 (set_attr "mode" "TI")])
6316
6317 (define_expand "sse2_pshuflw"
6318 [(match_operand:V8HI 0 "register_operand" "")
6319 (match_operand:V8HI 1 "nonimmediate_operand" "")
6320 (match_operand:SI 2 "const_int_operand" "")]
6321 "TARGET_SSE2"
6322 {
6323 int mask = INTVAL (operands[2]);
6324 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6325 GEN_INT ((mask >> 0) & 3),
6326 GEN_INT ((mask >> 2) & 3),
6327 GEN_INT ((mask >> 4) & 3),
6328 GEN_INT ((mask >> 6) & 3)));
6329 DONE;
6330 })
6331
6332 (define_insn "sse2_pshuflw_1"
6333 [(set (match_operand:V8HI 0 "register_operand" "=x")
6334 (vec_select:V8HI
6335 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6336 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6337 (match_operand 3 "const_0_to_3_operand" "")
6338 (match_operand 4 "const_0_to_3_operand" "")
6339 (match_operand 5 "const_0_to_3_operand" "")
6340 (const_int 4)
6341 (const_int 5)
6342 (const_int 6)
6343 (const_int 7)])))]
6344 "TARGET_SSE2"
6345 {
6346 int mask = 0;
6347 mask |= INTVAL (operands[2]) << 0;
6348 mask |= INTVAL (operands[3]) << 2;
6349 mask |= INTVAL (operands[4]) << 4;
6350 mask |= INTVAL (operands[5]) << 6;
6351 operands[2] = GEN_INT (mask);
6352
6353 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6354 }
6355 [(set_attr "type" "sselog")
6356 (set_attr "prefix_data16" "0")
6357 (set_attr "prefix_rep" "1")
6358 (set_attr "prefix" "maybe_vex")
6359 (set_attr "length_immediate" "1")
6360 (set_attr "mode" "TI")])
6361
6362 (define_expand "sse2_pshufhw"
6363 [(match_operand:V8HI 0 "register_operand" "")
6364 (match_operand:V8HI 1 "nonimmediate_operand" "")
6365 (match_operand:SI 2 "const_int_operand" "")]
6366 "TARGET_SSE2"
6367 {
6368 int mask = INTVAL (operands[2]);
6369 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
6370 GEN_INT (((mask >> 0) & 3) + 4),
6371 GEN_INT (((mask >> 2) & 3) + 4),
6372 GEN_INT (((mask >> 4) & 3) + 4),
6373 GEN_INT (((mask >> 6) & 3) + 4)));
6374 DONE;
6375 })
6376
6377 (define_insn "sse2_pshufhw_1"
6378 [(set (match_operand:V8HI 0 "register_operand" "=x")
6379 (vec_select:V8HI
6380 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6381 (parallel [(const_int 0)
6382 (const_int 1)
6383 (const_int 2)
6384 (const_int 3)
6385 (match_operand 2 "const_4_to_7_operand" "")
6386 (match_operand 3 "const_4_to_7_operand" "")
6387 (match_operand 4 "const_4_to_7_operand" "")
6388 (match_operand 5 "const_4_to_7_operand" "")])))]
6389 "TARGET_SSE2"
6390 {
6391 int mask = 0;
6392 mask |= (INTVAL (operands[2]) - 4) << 0;
6393 mask |= (INTVAL (operands[3]) - 4) << 2;
6394 mask |= (INTVAL (operands[4]) - 4) << 4;
6395 mask |= (INTVAL (operands[5]) - 4) << 6;
6396 operands[2] = GEN_INT (mask);
6397
6398 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
6399 }
6400 [(set_attr "type" "sselog")
6401 (set_attr "prefix_rep" "1")
6402 (set_attr "prefix_data16" "0")
6403 (set_attr "prefix" "maybe_vex")
6404 (set_attr "length_immediate" "1")
6405 (set_attr "mode" "TI")])
6406
6407 (define_expand "sse2_loadd"
6408 [(set (match_operand:V4SI 0 "register_operand" "")
6409 (vec_merge:V4SI
6410 (vec_duplicate:V4SI
6411 (match_operand:SI 1 "nonimmediate_operand" ""))
6412 (match_dup 2)
6413 (const_int 1)))]
6414 "TARGET_SSE"
6415 "operands[2] = CONST0_RTX (V4SImode);")
6416
6417 (define_insn "sse2_loadld"
6418 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x,x")
6419 (vec_merge:V4SI
6420 (vec_duplicate:V4SI
6421 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
6422 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
6423 (const_int 1)))]
6424 "TARGET_SSE"
6425 "@
6426 %vmovd\t{%2, %0|%0, %2}
6427 %vmovd\t{%2, %0|%0, %2}
6428 movss\t{%2, %0|%0, %2}
6429 movss\t{%2, %0|%0, %2}
6430 vmovss\t{%2, %1, %0|%0, %1, %2}"
6431 [(set_attr "isa" "base,base,noavx,noavx,avx")
6432 (set_attr "type" "ssemov")
6433 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
6434 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
6435
6436 (define_insn_and_split "sse2_stored"
6437 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
6438 (vec_select:SI
6439 (match_operand:V4SI 1 "register_operand" "x,Yi")
6440 (parallel [(const_int 0)])))]
6441 "TARGET_SSE"
6442 "#"
6443 "&& reload_completed
6444 && (TARGET_INTER_UNIT_MOVES
6445 || MEM_P (operands [0])
6446 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6447 [(set (match_dup 0) (match_dup 1))]
6448 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
6449
6450 (define_insn_and_split "*vec_ext_v4si_mem"
6451 [(set (match_operand:SI 0 "register_operand" "=r")
6452 (vec_select:SI
6453 (match_operand:V4SI 1 "memory_operand" "o")
6454 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
6455 ""
6456 "#"
6457 "reload_completed"
6458 [(const_int 0)]
6459 {
6460 int i = INTVAL (operands[2]);
6461
6462 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
6463 DONE;
6464 })
6465
6466 (define_expand "sse_storeq"
6467 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6468 (vec_select:DI
6469 (match_operand:V2DI 1 "register_operand" "")
6470 (parallel [(const_int 0)])))]
6471 "TARGET_SSE")
6472
6473 (define_insn "*sse2_storeq_rex64"
6474 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
6475 (vec_select:DI
6476 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
6477 (parallel [(const_int 0)])))]
6478 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6479 "@
6480 #
6481 #
6482 mov{q}\t{%1, %0|%0, %1}"
6483 [(set_attr "type" "*,*,imov")
6484 (set_attr "mode" "*,*,DI")])
6485
6486 (define_insn "*sse2_storeq"
6487 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
6488 (vec_select:DI
6489 (match_operand:V2DI 1 "register_operand" "x")
6490 (parallel [(const_int 0)])))]
6491 "TARGET_SSE"
6492 "#")
6493
6494 (define_split
6495 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6496 (vec_select:DI
6497 (match_operand:V2DI 1 "register_operand" "")
6498 (parallel [(const_int 0)])))]
6499 "TARGET_SSE
6500 && reload_completed
6501 && (TARGET_INTER_UNIT_MOVES
6502 || MEM_P (operands [0])
6503 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6504 [(set (match_dup 0) (match_dup 1))]
6505 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
6506
6507 (define_insn "*vec_extractv2di_1_rex64"
6508 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
6509 (vec_select:DI
6510 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
6511 (parallel [(const_int 1)])))]
6512 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6513 "@
6514 %vmovhps\t{%1, %0|%0, %1}
6515 psrldq\t{$8, %0|%0, 8}
6516 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6517 %vmovq\t{%H1, %0|%0, %H1}
6518 mov{q}\t{%H1, %0|%0, %H1}"
6519 [(set_attr "isa" "base,noavx,avx,base,base")
6520 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
6521 (set_attr "length_immediate" "*,1,1,*,*")
6522 (set_attr "memory" "*,none,none,*,*")
6523 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
6524 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
6525
6526 (define_insn "*vec_extractv2di_1_sse2"
6527 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x")
6528 (vec_select:DI
6529 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o")
6530 (parallel [(const_int 1)])))]
6531 "!TARGET_64BIT
6532 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6533 "@
6534 %vmovhps\t{%1, %0|%0, %1}
6535 psrldq\t{$8, %0|%0, 8}
6536 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6537 %vmovq\t{%H1, %0|%0, %H1}"
6538 [(set_attr "isa" "base,noavx,avx,base")
6539 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov")
6540 (set_attr "length_immediate" "*,1,1,*")
6541 (set_attr "memory" "*,none,none,*")
6542 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex")
6543 (set_attr "mode" "V2SF,TI,TI,TI")])
6544
6545 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
6546 (define_insn "*vec_extractv2di_1_sse"
6547 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
6548 (vec_select:DI
6549 (match_operand:V2DI 1 "nonimmediate_operand" " x,x,o")
6550 (parallel [(const_int 1)])))]
6551 "!TARGET_SSE2 && TARGET_SSE
6552 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6553 "@
6554 movhps\t{%1, %0|%0, %1}
6555 movhlps\t{%1, %0|%0, %1}
6556 movlps\t{%H1, %0|%0, %H1}"
6557 [(set_attr "type" "ssemov")
6558 (set_attr "mode" "V2SF,V4SF,V2SF")])
6559
6560 (define_insn "*vec_dupv4si_avx"
6561 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6562 (vec_duplicate:V4SI
6563 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
6564 "TARGET_AVX"
6565 "@
6566 vpshufd\t{$0, %1, %0|%0, %1, 0}
6567 vbroadcastss\t{%1, %0|%0, %1}"
6568 [(set_attr "type" "sselog1,ssemov")
6569 (set_attr "length_immediate" "1,0")
6570 (set_attr "prefix_extra" "0,1")
6571 (set_attr "prefix" "vex")
6572 (set_attr "mode" "TI,V4SF")])
6573
6574 (define_insn "*vec_dupv4si"
6575 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
6576 (vec_duplicate:V4SI
6577 (match_operand:SI 1 "register_operand" " Y2,0")))]
6578 "TARGET_SSE"
6579 "@
6580 pshufd\t{$0, %1, %0|%0, %1, 0}
6581 shufps\t{$0, %0, %0|%0, %0, 0}"
6582 [(set_attr "type" "sselog1")
6583 (set_attr "length_immediate" "1")
6584 (set_attr "mode" "TI,V4SF")])
6585
6586 (define_insn "*vec_dupv2di_sse3"
6587 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6588 (vec_duplicate:V2DI
6589 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
6590 "TARGET_SSE3"
6591 "@
6592 punpcklqdq\t%0, %0
6593 vpunpcklqdq\t{%d1, %0|%0, %d1}
6594 %vmovddup\t{%1, %0|%0, %1}"
6595 [(set_attr "isa" "noavx,avx,base")
6596 (set_attr "type" "sselog1")
6597 (set_attr "prefix" "orig,vex,maybe_vex")
6598 (set_attr "mode" "TI,TI,DF")])
6599
6600 (define_insn "*vec_dupv2di"
6601 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
6602 (vec_duplicate:V2DI
6603 (match_operand:DI 1 "register_operand" " 0 ,0")))]
6604 "TARGET_SSE"
6605 "@
6606 punpcklqdq\t%0, %0
6607 movlhps\t%0, %0"
6608 [(set_attr "type" "sselog1,ssemov")
6609 (set_attr "mode" "TI,V4SF")])
6610
6611 (define_insn "*vec_concatv2si_sse4_1"
6612 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
6613 (vec_concat:V2SI
6614 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
6615 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
6616 "TARGET_SSE4_1"
6617 "@
6618 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
6619 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
6620 punpckldq\t{%2, %0|%0, %2}
6621 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
6622 %vmovd\t{%1, %0|%0, %1}
6623 punpckldq\t{%2, %0|%0, %2}
6624 movd\t{%1, %0|%0, %1}"
6625 [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base")
6626 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6627 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
6628 (set_attr "length_immediate" "1,1,*,*,*,*,*")
6629 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
6630 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
6631
6632 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6633 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6634 ;; alternatives pretty much forces the MMX alternative to be chosen.
6635 (define_insn "*vec_concatv2si_sse2"
6636 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
6637 (vec_concat:V2SI
6638 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
6639 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
6640 "TARGET_SSE2"
6641 "@
6642 punpckldq\t{%2, %0|%0, %2}
6643 movd\t{%1, %0|%0, %1}
6644 punpckldq\t{%2, %0|%0, %2}
6645 movd\t{%1, %0|%0, %1}"
6646 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6647 (set_attr "mode" "TI,TI,DI,DI")])
6648
6649 (define_insn "*vec_concatv2si_sse"
6650 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
6651 (vec_concat:V2SI
6652 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
6653 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
6654 "TARGET_SSE"
6655 "@
6656 unpcklps\t{%2, %0|%0, %2}
6657 movss\t{%1, %0|%0, %1}
6658 punpckldq\t{%2, %0|%0, %2}
6659 movd\t{%1, %0|%0, %1}"
6660 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6661 (set_attr "mode" "V4SF,V4SF,DI,DI")])
6662
6663 (define_insn "*vec_concatv4si_1_avx"
6664 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6665 (vec_concat:V4SI
6666 (match_operand:V2SI 1 "register_operand" " x,x")
6667 (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
6668 "TARGET_AVX"
6669 "@
6670 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6671 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6672 [(set_attr "type" "sselog,ssemov")
6673 (set_attr "prefix" "vex")
6674 (set_attr "mode" "TI,V2SF")])
6675
6676 (define_insn "*vec_concatv4si_1"
6677 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
6678 (vec_concat:V4SI
6679 (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
6680 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
6681 "TARGET_SSE"
6682 "@
6683 punpcklqdq\t{%2, %0|%0, %2}
6684 movlhps\t{%2, %0|%0, %2}
6685 movhps\t{%2, %0|%0, %2}"
6686 [(set_attr "type" "sselog,ssemov,ssemov")
6687 (set_attr "mode" "TI,V4SF,V2SF")])
6688
6689 ;; movd instead of movq is required to handle broken assemblers.
6690 (define_insn "*vec_concatv2di_rex64_sse4_1"
6691 [(set (match_operand:V2DI 0 "register_operand"
6692 "=x, x, x,Yi,!x,x,x,x,x")
6693 (vec_concat:V2DI
6694 (match_operand:DI 1 "nonimmediate_operand"
6695 " 0, x,xm,r ,*y,0,x,0,x")
6696 (match_operand:DI 2 "vector_move_operand"
6697 "rm,rm, C,C ,C ,x,x,m,m")))]
6698 "TARGET_64BIT && TARGET_SSE4_1"
6699 "@
6700 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
6701 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
6702 %vmovq\t{%1, %0|%0, %1}
6703 %vmovd\t{%1, %0|%0, %1}
6704 movq2dq\t{%1, %0|%0, %1}
6705 punpcklqdq\t{%2, %0|%0, %2}
6706 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6707 movhps\t{%2, %0|%0, %2}
6708 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6709 [(set_attr "isa" "noavx,avx,base,base,base,noavx,avx,noavx,avx")
6710 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov,sselog,sselog,ssemov,ssemov")
6711 (set (attr "prefix_rex")
6712 (if_then_else
6713 (and (eq_attr "alternative" "0,3")
6714 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
6715 (const_string "1")
6716 (const_string "*")))
6717 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
6718 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
6719 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
6720 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
6721
6722 ;; movd instead of movq is required to handle broken assemblers.
6723 (define_insn "*vec_concatv2di_rex64_sse"
6724 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x")
6725 (vec_concat:V2DI
6726 (match_operand:DI 1 "nonimmediate_operand" "Y2m,r ,*y ,0 ,0,0")
6727 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
6728 "TARGET_64BIT && TARGET_SSE"
6729 "@
6730 movq\t{%1, %0|%0, %1}
6731 movd\t{%1, %0|%0, %1}
6732 movq2dq\t{%1, %0|%0, %1}
6733 punpcklqdq\t{%2, %0|%0, %2}
6734 movlhps\t{%2, %0|%0, %2}
6735 movhps\t{%2, %0|%0, %2}"
6736 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
6737 (set_attr "prefix_rex" "*,1,*,*,*,*")
6738 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
6739
6740 (define_insn "vec_concatv2di"
6741 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x,x")
6742 (vec_concat:V2DI
6743 (match_operand:DI 1 "nonimmediate_operand" "Y2m,*y , 0,x,0,0,x")
6744 (match_operand:DI 2 "vector_move_operand" " C , C ,Y2,x,x,m,m")))]
6745 "!TARGET_64BIT && TARGET_SSE"
6746 "@
6747 %vmovq\t{%1, %0|%0, %1}
6748 movq2dq\t{%1, %0|%0, %1}
6749 punpcklqdq\t{%2, %0|%0, %2}
6750 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6751 movlhps\t{%2, %0|%0, %2}
6752 movhps\t{%2, %0|%0, %2}
6753 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6754 [(set_attr "isa" "base,base,noavx,avx,noavx,noavx,avx")
6755 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
6756 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
6757 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
6758
6759 (define_expand "vec_unpacks_lo_<mode>"
6760 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6761 (match_operand:VI124_128 1 "register_operand" "")]
6762 "TARGET_SSE2"
6763 "ix86_expand_sse_unpack (operands, false, false); DONE;")
6764
6765 (define_expand "vec_unpacks_hi_<mode>"
6766 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6767 (match_operand:VI124_128 1 "register_operand" "")]
6768 "TARGET_SSE2"
6769 "ix86_expand_sse_unpack (operands, false, true); DONE;")
6770
6771 (define_expand "vec_unpacku_lo_<mode>"
6772 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6773 (match_operand:VI124_128 1 "register_operand" "")]
6774 "TARGET_SSE2"
6775 "ix86_expand_sse_unpack (operands, true, false); DONE;")
6776
6777 (define_expand "vec_unpacku_hi_<mode>"
6778 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6779 (match_operand:VI124_128 1 "register_operand" "")]
6780 "TARGET_SSE2"
6781 "ix86_expand_sse_unpack (operands, true, true); DONE;")
6782
6783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6784 ;;
6785 ;; Miscellaneous
6786 ;;
6787 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6788
6789 (define_expand "sse2_uavgv16qi3"
6790 [(set (match_operand:V16QI 0 "register_operand" "")
6791 (truncate:V16QI
6792 (lshiftrt:V16HI
6793 (plus:V16HI
6794 (plus:V16HI
6795 (zero_extend:V16HI
6796 (match_operand:V16QI 1 "nonimmediate_operand" ""))
6797 (zero_extend:V16HI
6798 (match_operand:V16QI 2 "nonimmediate_operand" "")))
6799 (const_vector:V16QI [(const_int 1) (const_int 1)
6800 (const_int 1) (const_int 1)
6801 (const_int 1) (const_int 1)
6802 (const_int 1) (const_int 1)
6803 (const_int 1) (const_int 1)
6804 (const_int 1) (const_int 1)
6805 (const_int 1) (const_int 1)
6806 (const_int 1) (const_int 1)]))
6807 (const_int 1))))]
6808 "TARGET_SSE2"
6809 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
6810
6811 (define_insn "*sse2_uavgv16qi3"
6812 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6813 (truncate:V16QI
6814 (lshiftrt:V16HI
6815 (plus:V16HI
6816 (plus:V16HI
6817 (zero_extend:V16HI
6818 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
6819 (zero_extend:V16HI
6820 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
6821 (const_vector:V16QI [(const_int 1) (const_int 1)
6822 (const_int 1) (const_int 1)
6823 (const_int 1) (const_int 1)
6824 (const_int 1) (const_int 1)
6825 (const_int 1) (const_int 1)
6826 (const_int 1) (const_int 1)
6827 (const_int 1) (const_int 1)
6828 (const_int 1) (const_int 1)]))
6829 (const_int 1))))]
6830 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
6831 "@
6832 pavgb\t{%2, %0|%0, %2}
6833 vpavgb\t{%2, %1, %0|%0, %1, %2}"
6834 [(set_attr "isa" "noavx,avx")
6835 (set_attr "type" "sseiadd")
6836 (set_attr "prefix_data16" "1,*")
6837 (set_attr "prefix" "orig,vex")
6838 (set_attr "mode" "TI")])
6839
6840 (define_expand "sse2_uavgv8hi3"
6841 [(set (match_operand:V8HI 0 "register_operand" "")
6842 (truncate:V8HI
6843 (lshiftrt:V8SI
6844 (plus:V8SI
6845 (plus:V8SI
6846 (zero_extend:V8SI
6847 (match_operand:V8HI 1 "nonimmediate_operand" ""))
6848 (zero_extend:V8SI
6849 (match_operand:V8HI 2 "nonimmediate_operand" "")))
6850 (const_vector:V8HI [(const_int 1) (const_int 1)
6851 (const_int 1) (const_int 1)
6852 (const_int 1) (const_int 1)
6853 (const_int 1) (const_int 1)]))
6854 (const_int 1))))]
6855 "TARGET_SSE2"
6856 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
6857
6858 (define_insn "*sse2_uavgv8hi3"
6859 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6860 (truncate:V8HI
6861 (lshiftrt:V8SI
6862 (plus:V8SI
6863 (plus:V8SI
6864 (zero_extend:V8SI
6865 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
6866 (zero_extend:V8SI
6867 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
6868 (const_vector:V8HI [(const_int 1) (const_int 1)
6869 (const_int 1) (const_int 1)
6870 (const_int 1) (const_int 1)
6871 (const_int 1) (const_int 1)]))
6872 (const_int 1))))]
6873 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
6874 "@
6875 pavgw\t{%2, %0|%0, %2}
6876 vpavgw\t{%2, %1, %0|%0, %1, %2}"
6877 [(set_attr "isa" "noavx,avx")
6878 (set_attr "type" "sseiadd")
6879 (set_attr "prefix_data16" "1,*")
6880 (set_attr "prefix" "orig,vex")
6881 (set_attr "mode" "TI")])
6882
6883 ;; The correct representation for this is absolutely enormous, and
6884 ;; surely not generally useful.
6885 (define_insn "sse2_psadbw"
6886 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6887 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0,x")
6888 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")]
6889 UNSPEC_PSADBW))]
6890 "TARGET_SSE2"
6891 "@
6892 psadbw\t{%2, %0|%0, %2}
6893 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
6894 [(set_attr "isa" "noavx,avx")
6895 (set_attr "type" "sseiadd")
6896 (set_attr "atom_unit" "simul")
6897 (set_attr "prefix_data16" "1,*")
6898 (set_attr "prefix" "orig,vex")
6899 (set_attr "mode" "TI")])
6900
6901 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
6902 [(set (match_operand:SI 0 "register_operand" "=r")
6903 (unspec:SI
6904 [(match_operand:VF 1 "register_operand" "x")]
6905 UNSPEC_MOVMSK))]
6906 "TARGET_SSE"
6907 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
6908 [(set_attr "type" "ssemov")
6909 (set_attr "prefix" "maybe_vex")
6910 (set_attr "mode" "<MODE>")])
6911
6912 (define_insn "sse2_pmovmskb"
6913 [(set (match_operand:SI 0 "register_operand" "=r")
6914 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
6915 UNSPEC_MOVMSK))]
6916 "TARGET_SSE2"
6917 "%vpmovmskb\t{%1, %0|%0, %1}"
6918 [(set_attr "type" "ssemov")
6919 (set_attr "prefix_data16" "1")
6920 (set_attr "prefix" "maybe_vex")
6921 (set_attr "mode" "SI")])
6922
6923 (define_expand "sse2_maskmovdqu"
6924 [(set (match_operand:V16QI 0 "memory_operand" "")
6925 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
6926 (match_operand:V16QI 2 "register_operand" "")
6927 (match_dup 0)]
6928 UNSPEC_MASKMOV))]
6929 "TARGET_SSE2")
6930
6931 (define_insn "*sse2_maskmovdqu"
6932 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
6933 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
6934 (match_operand:V16QI 2 "register_operand" "x")
6935 (mem:V16QI (match_dup 0))]
6936 UNSPEC_MASKMOV))]
6937 "TARGET_SSE2"
6938 "%vmaskmovdqu\t{%2, %1|%1, %2}"
6939 [(set_attr "type" "ssemov")
6940 (set_attr "prefix_data16" "1")
6941 ;; The implicit %rdi operand confuses default length_vex computation.
6942 (set (attr "length_vex")
6943 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
6944 (set_attr "prefix" "maybe_vex")
6945 (set_attr "mode" "TI")])
6946
6947 (define_insn "sse_ldmxcsr"
6948 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
6949 UNSPECV_LDMXCSR)]
6950 "TARGET_SSE"
6951 "%vldmxcsr\t%0"
6952 [(set_attr "type" "sse")
6953 (set_attr "atom_sse_attr" "mxcsr")
6954 (set_attr "prefix" "maybe_vex")
6955 (set_attr "memory" "load")])
6956
6957 (define_insn "sse_stmxcsr"
6958 [(set (match_operand:SI 0 "memory_operand" "=m")
6959 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
6960 "TARGET_SSE"
6961 "%vstmxcsr\t%0"
6962 [(set_attr "type" "sse")
6963 (set_attr "atom_sse_attr" "mxcsr")
6964 (set_attr "prefix" "maybe_vex")
6965 (set_attr "memory" "store")])
6966
6967 (define_expand "sse_sfence"
6968 [(set (match_dup 0)
6969 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
6970 "TARGET_SSE || TARGET_3DNOW_A"
6971 {
6972 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
6973 MEM_VOLATILE_P (operands[0]) = 1;
6974 })
6975
6976 (define_insn "*sse_sfence"
6977 [(set (match_operand:BLK 0 "" "")
6978 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
6979 "TARGET_SSE || TARGET_3DNOW_A"
6980 "sfence"
6981 [(set_attr "type" "sse")
6982 (set_attr "length_address" "0")
6983 (set_attr "atom_sse_attr" "fence")
6984 (set_attr "memory" "unknown")])
6985
6986 (define_insn "sse2_clflush"
6987 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
6988 UNSPECV_CLFLUSH)]
6989 "TARGET_SSE2"
6990 "clflush\t%a0"
6991 [(set_attr "type" "sse")
6992 (set_attr "atom_sse_attr" "fence")
6993 (set_attr "memory" "unknown")])
6994
6995 (define_expand "sse2_mfence"
6996 [(set (match_dup 0)
6997 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
6998 "TARGET_SSE2"
6999 {
7000 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7001 MEM_VOLATILE_P (operands[0]) = 1;
7002 })
7003
7004 (define_insn "*sse2_mfence"
7005 [(set (match_operand:BLK 0 "" "")
7006 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7007 "TARGET_64BIT || TARGET_SSE2"
7008 "mfence"
7009 [(set_attr "type" "sse")
7010 (set_attr "length_address" "0")
7011 (set_attr "atom_sse_attr" "fence")
7012 (set_attr "memory" "unknown")])
7013
7014 (define_expand "sse2_lfence"
7015 [(set (match_dup 0)
7016 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7017 "TARGET_SSE2"
7018 {
7019 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7020 MEM_VOLATILE_P (operands[0]) = 1;
7021 })
7022
7023 (define_insn "*sse2_lfence"
7024 [(set (match_operand:BLK 0 "" "")
7025 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7026 "TARGET_SSE2"
7027 "lfence"
7028 [(set_attr "type" "sse")
7029 (set_attr "length_address" "0")
7030 (set_attr "atom_sse_attr" "lfence")
7031 (set_attr "memory" "unknown")])
7032
7033 (define_insn "sse3_mwait"
7034 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7035 (match_operand:SI 1 "register_operand" "c")]
7036 UNSPECV_MWAIT)]
7037 "TARGET_SSE3"
7038 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
7039 ;; Since 32bit register operands are implicitly zero extended to 64bit,
7040 ;; we only need to set up 32bit registers.
7041 "mwait"
7042 [(set_attr "length" "3")])
7043
7044 (define_insn "sse3_monitor"
7045 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7046 (match_operand:SI 1 "register_operand" "c")
7047 (match_operand:SI 2 "register_operand" "d")]
7048 UNSPECV_MONITOR)]
7049 "TARGET_SSE3 && !TARGET_64BIT"
7050 "monitor\t%0, %1, %2"
7051 [(set_attr "length" "3")])
7052
7053 (define_insn "sse3_monitor64"
7054 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7055 (match_operand:SI 1 "register_operand" "c")
7056 (match_operand:SI 2 "register_operand" "d")]
7057 UNSPECV_MONITOR)]
7058 "TARGET_SSE3 && TARGET_64BIT"
7059 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7060 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7061 ;; zero extended to 64bit, we only need to set up 32bit registers.
7062 "monitor"
7063 [(set_attr "length" "3")])
7064
7065 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7066 ;;
7067 ;; SSSE3 instructions
7068 ;;
7069 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7070
7071 (define_insn "ssse3_phaddwv8hi3"
7072 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7073 (vec_concat:V8HI
7074 (vec_concat:V4HI
7075 (vec_concat:V2HI
7076 (plus:HI
7077 (vec_select:HI
7078 (match_operand:V8HI 1 "register_operand" "0,x")
7079 (parallel [(const_int 0)]))
7080 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7081 (plus:HI
7082 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7083 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7084 (vec_concat:V2HI
7085 (plus:HI
7086 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7087 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7088 (plus:HI
7089 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7090 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7091 (vec_concat:V4HI
7092 (vec_concat:V2HI
7093 (plus:HI
7094 (vec_select:HI
7095 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7096 (parallel [(const_int 0)]))
7097 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7098 (plus:HI
7099 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7100 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7101 (vec_concat:V2HI
7102 (plus:HI
7103 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7104 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7105 (plus:HI
7106 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7107 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7108 "TARGET_SSSE3"
7109 "@
7110 phaddw\t{%2, %0|%0, %2}
7111 vphaddw\t{%2, %1, %0|%0, %1, %2}"
7112 [(set_attr "isa" "noavx,avx")
7113 (set_attr "type" "sseiadd")
7114 (set_attr "atom_unit" "complex")
7115 (set_attr "prefix_data16" "1,*")
7116 (set_attr "prefix_extra" "1")
7117 (set_attr "prefix" "orig,vex")
7118 (set_attr "mode" "TI")])
7119
7120 (define_insn "ssse3_phaddwv4hi3"
7121 [(set (match_operand:V4HI 0 "register_operand" "=y")
7122 (vec_concat:V4HI
7123 (vec_concat:V2HI
7124 (plus:HI
7125 (vec_select:HI
7126 (match_operand:V4HI 1 "register_operand" "0")
7127 (parallel [(const_int 0)]))
7128 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7129 (plus:HI
7130 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7131 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7132 (vec_concat:V2HI
7133 (plus:HI
7134 (vec_select:HI
7135 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7136 (parallel [(const_int 0)]))
7137 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7138 (plus:HI
7139 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7140 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7141 "TARGET_SSSE3"
7142 "phaddw\t{%2, %0|%0, %2}"
7143 [(set_attr "type" "sseiadd")
7144 (set_attr "atom_unit" "complex")
7145 (set_attr "prefix_extra" "1")
7146 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7147 (set_attr "mode" "DI")])
7148
7149 (define_insn "ssse3_phadddv4si3"
7150 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7151 (vec_concat:V4SI
7152 (vec_concat:V2SI
7153 (plus:SI
7154 (vec_select:SI
7155 (match_operand:V4SI 1 "register_operand" "0,x")
7156 (parallel [(const_int 0)]))
7157 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7158 (plus:SI
7159 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7160 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7161 (vec_concat:V2SI
7162 (plus:SI
7163 (vec_select:SI
7164 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7165 (parallel [(const_int 0)]))
7166 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7167 (plus:SI
7168 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7169 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7170 "TARGET_SSSE3"
7171 "@
7172 phaddd\t{%2, %0|%0, %2}
7173 vphaddd\t{%2, %1, %0|%0, %1, %2}"
7174 [(set_attr "isa" "noavx,avx")
7175 (set_attr "type" "sseiadd")
7176 (set_attr "atom_unit" "complex")
7177 (set_attr "prefix_data16" "1,*")
7178 (set_attr "prefix_extra" "1")
7179 (set_attr "prefix" "orig,vex")
7180 (set_attr "mode" "TI")])
7181
7182 (define_insn "ssse3_phadddv2si3"
7183 [(set (match_operand:V2SI 0 "register_operand" "=y")
7184 (vec_concat:V2SI
7185 (plus:SI
7186 (vec_select:SI
7187 (match_operand:V2SI 1 "register_operand" "0")
7188 (parallel [(const_int 0)]))
7189 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7190 (plus:SI
7191 (vec_select:SI
7192 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7193 (parallel [(const_int 0)]))
7194 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7195 "TARGET_SSSE3"
7196 "phaddd\t{%2, %0|%0, %2}"
7197 [(set_attr "type" "sseiadd")
7198 (set_attr "atom_unit" "complex")
7199 (set_attr "prefix_extra" "1")
7200 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7201 (set_attr "mode" "DI")])
7202
7203 (define_insn "ssse3_phaddswv8hi3"
7204 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7205 (vec_concat:V8HI
7206 (vec_concat:V4HI
7207 (vec_concat:V2HI
7208 (ss_plus:HI
7209 (vec_select:HI
7210 (match_operand:V8HI 1 "register_operand" "0,x")
7211 (parallel [(const_int 0)]))
7212 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7213 (ss_plus:HI
7214 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7215 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7216 (vec_concat:V2HI
7217 (ss_plus:HI
7218 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7219 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7220 (ss_plus:HI
7221 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7222 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7223 (vec_concat:V4HI
7224 (vec_concat:V2HI
7225 (ss_plus:HI
7226 (vec_select:HI
7227 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7228 (parallel [(const_int 0)]))
7229 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7230 (ss_plus:HI
7231 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7232 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7233 (vec_concat:V2HI
7234 (ss_plus:HI
7235 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7236 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7237 (ss_plus:HI
7238 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7239 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7240 "TARGET_SSSE3"
7241 "@
7242 phaddsw\t{%2, %0|%0, %2}
7243 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
7244 [(set_attr "isa" "noavx,avx")
7245 (set_attr "type" "sseiadd")
7246 (set_attr "atom_unit" "complex")
7247 (set_attr "prefix_data16" "1,*")
7248 (set_attr "prefix_extra" "1")
7249 (set_attr "prefix" "orig,vex")
7250 (set_attr "mode" "TI")])
7251
7252 (define_insn "ssse3_phaddswv4hi3"
7253 [(set (match_operand:V4HI 0 "register_operand" "=y")
7254 (vec_concat:V4HI
7255 (vec_concat:V2HI
7256 (ss_plus:HI
7257 (vec_select:HI
7258 (match_operand:V4HI 1 "register_operand" "0")
7259 (parallel [(const_int 0)]))
7260 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7261 (ss_plus:HI
7262 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7263 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7264 (vec_concat:V2HI
7265 (ss_plus:HI
7266 (vec_select:HI
7267 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7268 (parallel [(const_int 0)]))
7269 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7270 (ss_plus:HI
7271 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7272 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7273 "TARGET_SSSE3"
7274 "phaddsw\t{%2, %0|%0, %2}"
7275 [(set_attr "type" "sseiadd")
7276 (set_attr "atom_unit" "complex")
7277 (set_attr "prefix_extra" "1")
7278 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7279 (set_attr "mode" "DI")])
7280
7281 (define_insn "ssse3_phsubwv8hi3"
7282 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7283 (vec_concat:V8HI
7284 (vec_concat:V4HI
7285 (vec_concat:V2HI
7286 (minus:HI
7287 (vec_select:HI
7288 (match_operand:V8HI 1 "register_operand" "0,x")
7289 (parallel [(const_int 0)]))
7290 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7291 (minus:HI
7292 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7293 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7294 (vec_concat:V2HI
7295 (minus:HI
7296 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7297 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7298 (minus:HI
7299 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7300 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7301 (vec_concat:V4HI
7302 (vec_concat:V2HI
7303 (minus:HI
7304 (vec_select:HI
7305 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7306 (parallel [(const_int 0)]))
7307 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7308 (minus:HI
7309 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7310 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7311 (vec_concat:V2HI
7312 (minus:HI
7313 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7314 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7315 (minus:HI
7316 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7317 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7318 "TARGET_SSSE3"
7319 "@
7320 phsubw\t{%2, %0|%0, %2}
7321 vphsubw\t{%2, %1, %0|%0, %1, %2}"
7322 [(set_attr "isa" "noavx,avx")
7323 (set_attr "type" "sseiadd")
7324 (set_attr "atom_unit" "complex")
7325 (set_attr "prefix_data16" "1,*")
7326 (set_attr "prefix_extra" "1")
7327 (set_attr "prefix" "orig,vex")
7328 (set_attr "mode" "TI")])
7329
7330 (define_insn "ssse3_phsubwv4hi3"
7331 [(set (match_operand:V4HI 0 "register_operand" "=y")
7332 (vec_concat:V4HI
7333 (vec_concat:V2HI
7334 (minus:HI
7335 (vec_select:HI
7336 (match_operand:V4HI 1 "register_operand" "0")
7337 (parallel [(const_int 0)]))
7338 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7339 (minus:HI
7340 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7341 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7342 (vec_concat:V2HI
7343 (minus:HI
7344 (vec_select:HI
7345 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7346 (parallel [(const_int 0)]))
7347 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7348 (minus:HI
7349 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7350 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7351 "TARGET_SSSE3"
7352 "phsubw\t{%2, %0|%0, %2}"
7353 [(set_attr "type" "sseiadd")
7354 (set_attr "atom_unit" "complex")
7355 (set_attr "prefix_extra" "1")
7356 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7357 (set_attr "mode" "DI")])
7358
7359 (define_insn "ssse3_phsubdv4si3"
7360 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7361 (vec_concat:V4SI
7362 (vec_concat:V2SI
7363 (minus:SI
7364 (vec_select:SI
7365 (match_operand:V4SI 1 "register_operand" "0,x")
7366 (parallel [(const_int 0)]))
7367 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7368 (minus:SI
7369 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7370 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7371 (vec_concat:V2SI
7372 (minus:SI
7373 (vec_select:SI
7374 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7375 (parallel [(const_int 0)]))
7376 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7377 (minus:SI
7378 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7379 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7380 "TARGET_SSSE3"
7381 "@
7382 phsubd\t{%2, %0|%0, %2}
7383 vphsubd\t{%2, %1, %0|%0, %1, %2}"
7384
7385 [(set_attr "isa" "noavx,avx")
7386 (set_attr "type" "sseiadd")
7387 (set_attr "atom_unit" "complex")
7388 (set_attr "prefix_data16" "1,*")
7389 (set_attr "prefix_extra" "1")
7390 (set_attr "prefix" "orig,vex")
7391 (set_attr "mode" "TI")])
7392
7393 (define_insn "ssse3_phsubdv2si3"
7394 [(set (match_operand:V2SI 0 "register_operand" "=y")
7395 (vec_concat:V2SI
7396 (minus:SI
7397 (vec_select:SI
7398 (match_operand:V2SI 1 "register_operand" "0")
7399 (parallel [(const_int 0)]))
7400 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7401 (minus:SI
7402 (vec_select:SI
7403 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7404 (parallel [(const_int 0)]))
7405 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7406 "TARGET_SSSE3"
7407 "phsubd\t{%2, %0|%0, %2}"
7408 [(set_attr "type" "sseiadd")
7409 (set_attr "atom_unit" "complex")
7410 (set_attr "prefix_extra" "1")
7411 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7412 (set_attr "mode" "DI")])
7413
7414 (define_insn "ssse3_phsubswv8hi3"
7415 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7416 (vec_concat:V8HI
7417 (vec_concat:V4HI
7418 (vec_concat:V2HI
7419 (ss_minus:HI
7420 (vec_select:HI
7421 (match_operand:V8HI 1 "register_operand" "0,x")
7422 (parallel [(const_int 0)]))
7423 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7424 (ss_minus:HI
7425 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7426 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7427 (vec_concat:V2HI
7428 (ss_minus:HI
7429 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7430 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7431 (ss_minus:HI
7432 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7433 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7434 (vec_concat:V4HI
7435 (vec_concat:V2HI
7436 (ss_minus:HI
7437 (vec_select:HI
7438 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7439 (parallel [(const_int 0)]))
7440 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7441 (ss_minus:HI
7442 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7443 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7444 (vec_concat:V2HI
7445 (ss_minus:HI
7446 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7447 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7448 (ss_minus:HI
7449 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7450 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7451 "TARGET_SSSE3"
7452 "@
7453 phsubsw\t{%2, %0|%0, %2}
7454 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
7455 [(set_attr "isa" "noavx,avx")
7456 (set_attr "type" "sseiadd")
7457 (set_attr "atom_unit" "complex")
7458 (set_attr "prefix_data16" "1,*")
7459 (set_attr "prefix_extra" "1")
7460 (set_attr "prefix" "orig,vex")
7461 (set_attr "mode" "TI")])
7462
7463 (define_insn "ssse3_phsubswv4hi3"
7464 [(set (match_operand:V4HI 0 "register_operand" "=y")
7465 (vec_concat:V4HI
7466 (vec_concat:V2HI
7467 (ss_minus:HI
7468 (vec_select:HI
7469 (match_operand:V4HI 1 "register_operand" "0")
7470 (parallel [(const_int 0)]))
7471 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7472 (ss_minus:HI
7473 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7474 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7475 (vec_concat:V2HI
7476 (ss_minus:HI
7477 (vec_select:HI
7478 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7479 (parallel [(const_int 0)]))
7480 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7481 (ss_minus:HI
7482 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7483 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7484 "TARGET_SSSE3"
7485 "phsubsw\t{%2, %0|%0, %2}"
7486 [(set_attr "type" "sseiadd")
7487 (set_attr "atom_unit" "complex")
7488 (set_attr "prefix_extra" "1")
7489 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7490 (set_attr "mode" "DI")])
7491
7492 (define_insn "ssse3_pmaddubsw128"
7493 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7494 (ss_plus:V8HI
7495 (mult:V8HI
7496 (zero_extend:V8HI
7497 (vec_select:V8QI
7498 (match_operand:V16QI 1 "register_operand" "0,x")
7499 (parallel [(const_int 0)
7500 (const_int 2)
7501 (const_int 4)
7502 (const_int 6)
7503 (const_int 8)
7504 (const_int 10)
7505 (const_int 12)
7506 (const_int 14)])))
7507 (sign_extend:V8HI
7508 (vec_select:V8QI
7509 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
7510 (parallel [(const_int 0)
7511 (const_int 2)
7512 (const_int 4)
7513 (const_int 6)
7514 (const_int 8)
7515 (const_int 10)
7516 (const_int 12)
7517 (const_int 14)]))))
7518 (mult:V8HI
7519 (zero_extend:V8HI
7520 (vec_select:V8QI (match_dup 1)
7521 (parallel [(const_int 1)
7522 (const_int 3)
7523 (const_int 5)
7524 (const_int 7)
7525 (const_int 9)
7526 (const_int 11)
7527 (const_int 13)
7528 (const_int 15)])))
7529 (sign_extend:V8HI
7530 (vec_select:V8QI (match_dup 2)
7531 (parallel [(const_int 1)
7532 (const_int 3)
7533 (const_int 5)
7534 (const_int 7)
7535 (const_int 9)
7536 (const_int 11)
7537 (const_int 13)
7538 (const_int 15)]))))))]
7539 "TARGET_SSSE3"
7540 "@
7541 pmaddubsw\t{%2, %0|%0, %2}
7542 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
7543 [(set_attr "isa" "noavx,avx")
7544 (set_attr "type" "sseiadd")
7545 (set_attr "atom_unit" "simul")
7546 (set_attr "prefix_data16" "1,*")
7547 (set_attr "prefix_extra" "1")
7548 (set_attr "prefix" "orig,vex")
7549 (set_attr "mode" "TI")])
7550
7551 (define_insn "ssse3_pmaddubsw"
7552 [(set (match_operand:V4HI 0 "register_operand" "=y")
7553 (ss_plus:V4HI
7554 (mult:V4HI
7555 (zero_extend:V4HI
7556 (vec_select:V4QI
7557 (match_operand:V8QI 1 "register_operand" "0")
7558 (parallel [(const_int 0)
7559 (const_int 2)
7560 (const_int 4)
7561 (const_int 6)])))
7562 (sign_extend:V4HI
7563 (vec_select:V4QI
7564 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
7565 (parallel [(const_int 0)
7566 (const_int 2)
7567 (const_int 4)
7568 (const_int 6)]))))
7569 (mult:V4HI
7570 (zero_extend:V4HI
7571 (vec_select:V4QI (match_dup 1)
7572 (parallel [(const_int 1)
7573 (const_int 3)
7574 (const_int 5)
7575 (const_int 7)])))
7576 (sign_extend:V4HI
7577 (vec_select:V4QI (match_dup 2)
7578 (parallel [(const_int 1)
7579 (const_int 3)
7580 (const_int 5)
7581 (const_int 7)]))))))]
7582 "TARGET_SSSE3"
7583 "pmaddubsw\t{%2, %0|%0, %2}"
7584 [(set_attr "type" "sseiadd")
7585 (set_attr "atom_unit" "simul")
7586 (set_attr "prefix_extra" "1")
7587 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7588 (set_attr "mode" "DI")])
7589
7590 (define_expand "ssse3_pmulhrswv8hi3"
7591 [(set (match_operand:V8HI 0 "register_operand" "")
7592 (truncate:V8HI
7593 (lshiftrt:V8SI
7594 (plus:V8SI
7595 (lshiftrt:V8SI
7596 (mult:V8SI
7597 (sign_extend:V8SI
7598 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7599 (sign_extend:V8SI
7600 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7601 (const_int 14))
7602 (const_vector:V8HI [(const_int 1) (const_int 1)
7603 (const_int 1) (const_int 1)
7604 (const_int 1) (const_int 1)
7605 (const_int 1) (const_int 1)]))
7606 (const_int 1))))]
7607 "TARGET_SSSE3"
7608 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
7609
7610 (define_insn "*ssse3_pmulhrswv8hi3"
7611 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7612 (truncate:V8HI
7613 (lshiftrt:V8SI
7614 (plus:V8SI
7615 (lshiftrt:V8SI
7616 (mult:V8SI
7617 (sign_extend:V8SI
7618 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7619 (sign_extend:V8SI
7620 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7621 (const_int 14))
7622 (const_vector:V8HI [(const_int 1) (const_int 1)
7623 (const_int 1) (const_int 1)
7624 (const_int 1) (const_int 1)
7625 (const_int 1) (const_int 1)]))
7626 (const_int 1))))]
7627 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
7628 "@
7629 pmulhrsw\t{%2, %0|%0, %2}
7630 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
7631 [(set_attr "isa" "noavx,avx")
7632 (set_attr "type" "sseimul")
7633 (set_attr "prefix_data16" "1,*")
7634 (set_attr "prefix_extra" "1")
7635 (set_attr "prefix" "orig,vex")
7636 (set_attr "mode" "TI")])
7637
7638 (define_expand "ssse3_pmulhrswv4hi3"
7639 [(set (match_operand:V4HI 0 "register_operand" "")
7640 (truncate:V4HI
7641 (lshiftrt:V4SI
7642 (plus:V4SI
7643 (lshiftrt:V4SI
7644 (mult:V4SI
7645 (sign_extend:V4SI
7646 (match_operand:V4HI 1 "nonimmediate_operand" ""))
7647 (sign_extend:V4SI
7648 (match_operand:V4HI 2 "nonimmediate_operand" "")))
7649 (const_int 14))
7650 (const_vector:V4HI [(const_int 1) (const_int 1)
7651 (const_int 1) (const_int 1)]))
7652 (const_int 1))))]
7653 "TARGET_SSSE3"
7654 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
7655
7656 (define_insn "*ssse3_pmulhrswv4hi3"
7657 [(set (match_operand:V4HI 0 "register_operand" "=y")
7658 (truncate:V4HI
7659 (lshiftrt:V4SI
7660 (plus:V4SI
7661 (lshiftrt:V4SI
7662 (mult:V4SI
7663 (sign_extend:V4SI
7664 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
7665 (sign_extend:V4SI
7666 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
7667 (const_int 14))
7668 (const_vector:V4HI [(const_int 1) (const_int 1)
7669 (const_int 1) (const_int 1)]))
7670 (const_int 1))))]
7671 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
7672 "pmulhrsw\t{%2, %0|%0, %2}"
7673 [(set_attr "type" "sseimul")
7674 (set_attr "prefix_extra" "1")
7675 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7676 (set_attr "mode" "DI")])
7677
7678 (define_insn "ssse3_pshufbv16qi3"
7679 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7680 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x")
7681 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")]
7682 UNSPEC_PSHUFB))]
7683 "TARGET_SSSE3"
7684 "@
7685 pshufb\t{%2, %0|%0, %2}
7686 vpshufb\t{%2, %1, %0|%0, %1, %2}"
7687 [(set_attr "isa" "noavx,avx")
7688 (set_attr "type" "sselog1")
7689 (set_attr "prefix_data16" "1,*")
7690 (set_attr "prefix_extra" "1")
7691 (set_attr "prefix" "orig,vex")
7692 (set_attr "mode" "TI")])
7693
7694 (define_insn "ssse3_pshufbv8qi3"
7695 [(set (match_operand:V8QI 0 "register_operand" "=y")
7696 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
7697 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
7698 UNSPEC_PSHUFB))]
7699 "TARGET_SSSE3"
7700 "pshufb\t{%2, %0|%0, %2}";
7701 [(set_attr "type" "sselog1")
7702 (set_attr "prefix_extra" "1")
7703 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7704 (set_attr "mode" "DI")])
7705
7706 (define_insn "ssse3_psign<mode>3"
7707 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
7708 (unspec:VI124_128
7709 [(match_operand:VI124_128 1 "register_operand" "0,x")
7710 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")]
7711 UNSPEC_PSIGN))]
7712 "TARGET_SSSE3"
7713 "@
7714 psign<ssemodesuffix>\t{%2, %0|%0, %2}
7715 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7716 [(set_attr "isa" "noavx,avx")
7717 (set_attr "type" "sselog1")
7718 (set_attr "prefix_data16" "1,*")
7719 (set_attr "prefix_extra" "1")
7720 (set_attr "prefix" "orig,vex")
7721 (set_attr "mode" "TI")])
7722
7723 (define_insn "ssse3_psign<mode>3"
7724 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
7725 (unspec:MMXMODEI
7726 [(match_operand:MMXMODEI 1 "register_operand" "0")
7727 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
7728 UNSPEC_PSIGN))]
7729 "TARGET_SSSE3"
7730 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
7731 [(set_attr "type" "sselog1")
7732 (set_attr "prefix_extra" "1")
7733 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7734 (set_attr "mode" "DI")])
7735
7736 (define_insn "ssse3_palignrti"
7737 [(set (match_operand:TI 0 "register_operand" "=x,x")
7738 (unspec:TI [(match_operand:TI 1 "register_operand" "0,x")
7739 (match_operand:TI 2 "nonimmediate_operand" "xm,xm")
7740 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
7741 UNSPEC_PALIGNR))]
7742 "TARGET_SSSE3"
7743 {
7744 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
7745
7746 switch (which_alternative)
7747 {
7748 case 0:
7749 return "palignr\t{%3, %2, %0|%0, %2, %3}";
7750 case 1:
7751 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7752 default:
7753 gcc_unreachable ();
7754 }
7755 }
7756 [(set_attr "isa" "noavx,avx")
7757 (set_attr "type" "sseishft")
7758 (set_attr "atom_unit" "sishuf")
7759 (set_attr "prefix_data16" "1,*")
7760 (set_attr "prefix_extra" "1")
7761 (set_attr "length_immediate" "1")
7762 (set_attr "prefix" "orig,vex")
7763 (set_attr "mode" "TI")])
7764
7765 (define_insn "ssse3_palignrdi"
7766 [(set (match_operand:DI 0 "register_operand" "=y")
7767 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
7768 (match_operand:DI 2 "nonimmediate_operand" "ym")
7769 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
7770 UNSPEC_PALIGNR))]
7771 "TARGET_SSSE3"
7772 {
7773 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
7774 return "palignr\t{%3, %2, %0|%0, %2, %3}";
7775 }
7776 [(set_attr "type" "sseishft")
7777 (set_attr "atom_unit" "sishuf")
7778 (set_attr "prefix_extra" "1")
7779 (set_attr "length_immediate" "1")
7780 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7781 (set_attr "mode" "DI")])
7782
7783 (define_insn "abs<mode>2"
7784 [(set (match_operand:VI124_128 0 "register_operand" "=x")
7785 (abs:VI124_128
7786 (match_operand:VI124_128 1 "nonimmediate_operand" "xm")))]
7787 "TARGET_SSSE3"
7788 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
7789 [(set_attr "type" "sselog1")
7790 (set_attr "prefix_data16" "1")
7791 (set_attr "prefix_extra" "1")
7792 (set_attr "prefix" "maybe_vex")
7793 (set_attr "mode" "TI")])
7794
7795 (define_insn "abs<mode>2"
7796 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
7797 (abs:MMXMODEI
7798 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
7799 "TARGET_SSSE3"
7800 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
7801 [(set_attr "type" "sselog1")
7802 (set_attr "prefix_rep" "0")
7803 (set_attr "prefix_extra" "1")
7804 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7805 (set_attr "mode" "DI")])
7806
7807 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7808 ;;
7809 ;; AMD SSE4A instructions
7810 ;;
7811 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7812
7813 (define_insn "sse4a_movnt<mode>"
7814 [(set (match_operand:MODEF 0 "memory_operand" "=m")
7815 (unspec:MODEF
7816 [(match_operand:MODEF 1 "register_operand" "x")]
7817 UNSPEC_MOVNT))]
7818 "TARGET_SSE4A"
7819 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
7820 [(set_attr "type" "ssemov")
7821 (set_attr "mode" "<MODE>")])
7822
7823 (define_insn "sse4a_vmmovnt<mode>"
7824 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
7825 (unspec:<ssescalarmode>
7826 [(vec_select:<ssescalarmode>
7827 (match_operand:VF_128 1 "register_operand" "x")
7828 (parallel [(const_int 0)]))]
7829 UNSPEC_MOVNT))]
7830 "TARGET_SSE4A"
7831 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
7832 [(set_attr "type" "ssemov")
7833 (set_attr "mode" "<ssescalarmode>")])
7834
7835 (define_insn "sse4a_extrqi"
7836 [(set (match_operand:V2DI 0 "register_operand" "=x")
7837 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7838 (match_operand 2 "const_int_operand" "")
7839 (match_operand 3 "const_int_operand" "")]
7840 UNSPEC_EXTRQI))]
7841 "TARGET_SSE4A"
7842 "extrq\t{%3, %2, %0|%0, %2, %3}"
7843 [(set_attr "type" "sse")
7844 (set_attr "prefix_data16" "1")
7845 (set_attr "length_immediate" "2")
7846 (set_attr "mode" "TI")])
7847
7848 (define_insn "sse4a_extrq"
7849 [(set (match_operand:V2DI 0 "register_operand" "=x")
7850 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7851 (match_operand:V16QI 2 "register_operand" "x")]
7852 UNSPEC_EXTRQ))]
7853 "TARGET_SSE4A"
7854 "extrq\t{%2, %0|%0, %2}"
7855 [(set_attr "type" "sse")
7856 (set_attr "prefix_data16" "1")
7857 (set_attr "mode" "TI")])
7858
7859 (define_insn "sse4a_insertqi"
7860 [(set (match_operand:V2DI 0 "register_operand" "=x")
7861 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7862 (match_operand:V2DI 2 "register_operand" "x")
7863 (match_operand 3 "const_int_operand" "")
7864 (match_operand 4 "const_int_operand" "")]
7865 UNSPEC_INSERTQI))]
7866 "TARGET_SSE4A"
7867 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
7868 [(set_attr "type" "sseins")
7869 (set_attr "prefix_data16" "0")
7870 (set_attr "prefix_rep" "1")
7871 (set_attr "length_immediate" "2")
7872 (set_attr "mode" "TI")])
7873
7874 (define_insn "sse4a_insertq"
7875 [(set (match_operand:V2DI 0 "register_operand" "=x")
7876 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7877 (match_operand:V2DI 2 "register_operand" "x")]
7878 UNSPEC_INSERTQ))]
7879 "TARGET_SSE4A"
7880 "insertq\t{%2, %0|%0, %2}"
7881 [(set_attr "type" "sseins")
7882 (set_attr "prefix_data16" "0")
7883 (set_attr "prefix_rep" "1")
7884 (set_attr "mode" "TI")])
7885
7886 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7887 ;;
7888 ;; Intel SSE4.1 instructions
7889 ;;
7890 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7891
7892 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
7893 [(set (match_operand:VF 0 "register_operand" "=x,x")
7894 (vec_merge:VF
7895 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
7896 (match_operand:VF 1 "register_operand" "0,x")
7897 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n,n")))]
7898 "TARGET_SSE4_1"
7899 "@
7900 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7901 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7902 [(set_attr "isa" "noavx,avx")
7903 (set_attr "type" "ssemov")
7904 (set_attr "length_immediate" "1")
7905 (set_attr "prefix_data16" "1,*")
7906 (set_attr "prefix_extra" "1")
7907 (set_attr "prefix" "orig,vex")
7908 (set_attr "mode" "<MODE>")])
7909
7910 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
7911 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
7912 (unspec:VF
7913 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
7914 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
7915 (match_operand:VF 3 "register_operand" "Yz,x")]
7916 UNSPEC_BLENDV))]
7917 "TARGET_SSE4_1"
7918 "@
7919 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7920 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7921 [(set_attr "isa" "noavx,avx")
7922 (set_attr "type" "ssemov")
7923 (set_attr "length_immediate" "1")
7924 (set_attr "prefix_data16" "1,*")
7925 (set_attr "prefix_extra" "1")
7926 (set_attr "prefix" "orig,vex")
7927 (set_attr "mode" "<MODE>")])
7928
7929 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
7930 [(set (match_operand:VF 0 "register_operand" "=x,x")
7931 (unspec:VF
7932 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
7933 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
7934 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7935 UNSPEC_DP))]
7936 "TARGET_SSE4_1"
7937 "@
7938 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7939 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7940 [(set_attr "isa" "noavx,avx")
7941 (set_attr "type" "ssemul")
7942 (set_attr "length_immediate" "1")
7943 (set_attr "prefix_data16" "1,*")
7944 (set_attr "prefix_extra" "1")
7945 (set_attr "prefix" "orig,vex")
7946 (set_attr "mode" "<MODE>")])
7947
7948 (define_insn "sse4_1_movntdqa"
7949 [(set (match_operand:V2DI 0 "register_operand" "=x")
7950 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
7951 UNSPEC_MOVNTDQA))]
7952 "TARGET_SSE4_1"
7953 "%vmovntdqa\t{%1, %0|%0, %1}"
7954 [(set_attr "type" "ssemov")
7955 (set_attr "prefix_extra" "1")
7956 (set_attr "prefix" "maybe_vex")
7957 (set_attr "mode" "TI")])
7958
7959 (define_insn "sse4_1_mpsadbw"
7960 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7961 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x")
7962 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
7963 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7964 UNSPEC_MPSADBW))]
7965 "TARGET_SSE4_1"
7966 "@
7967 mpsadbw\t{%3, %2, %0|%0, %2, %3}
7968 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7969 [(set_attr "isa" "noavx,avx")
7970 (set_attr "type" "sselog1")
7971 (set_attr "length_immediate" "1")
7972 (set_attr "prefix_extra" "1")
7973 (set_attr "prefix" "orig,vex")
7974 (set_attr "mode" "TI")])
7975
7976 (define_insn "sse4_1_packusdw"
7977 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7978 (vec_concat:V8HI
7979 (us_truncate:V4HI
7980 (match_operand:V4SI 1 "register_operand" "0,x"))
7981 (us_truncate:V4HI
7982 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
7983 "TARGET_SSE4_1"
7984 "@
7985 packusdw\t{%2, %0|%0, %2}
7986 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
7987 [(set_attr "isa" "noavx,avx")
7988 (set_attr "type" "sselog")
7989 (set_attr "prefix_extra" "1")
7990 (set_attr "prefix" "orig,vex")
7991 (set_attr "mode" "TI")])
7992
7993 (define_insn "sse4_1_pblendvb"
7994 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x,x")
7995 (unspec:V16QI
7996 [(match_operand:V16QI 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
7997 (match_operand:V16QI 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
7998 (match_operand:V16QI 3 "register_operand" "Yz,x")]
7999 UNSPEC_BLENDV))]
8000 "TARGET_SSE4_1"
8001 "@
8002 pblendvb\t{%3, %2, %0|%0, %2, %3}
8003 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8004 [(set_attr "isa" "noavx,avx")
8005 (set_attr "type" "ssemov")
8006 (set_attr "prefix_extra" "1")
8007 (set_attr "length_immediate" "*,1")
8008 (set_attr "prefix" "orig,vex")
8009 (set_attr "mode" "TI")])
8010
8011 (define_insn "sse4_1_pblendw"
8012 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8013 (vec_merge:V8HI
8014 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8015 (match_operand:V8HI 1 "register_operand" "0,x")
8016 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
8017 "TARGET_SSE4_1"
8018 "@
8019 pblendw\t{%3, %2, %0|%0, %2, %3}
8020 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8021 [(set_attr "isa" "noavx,avx")
8022 (set_attr "type" "ssemov")
8023 (set_attr "prefix_extra" "1")
8024 (set_attr "length_immediate" "1")
8025 (set_attr "prefix" "orig,vex")
8026 (set_attr "mode" "TI")])
8027
8028 (define_insn "sse4_1_phminposuw"
8029 [(set (match_operand:V8HI 0 "register_operand" "=x")
8030 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
8031 UNSPEC_PHMINPOSUW))]
8032 "TARGET_SSE4_1"
8033 "%vphminposuw\t{%1, %0|%0, %1}"
8034 [(set_attr "type" "sselog1")
8035 (set_attr "prefix_extra" "1")
8036 (set_attr "prefix" "maybe_vex")
8037 (set_attr "mode" "TI")])
8038
8039 (define_insn "sse4_1_<code>v8qiv8hi2"
8040 [(set (match_operand:V8HI 0 "register_operand" "=x")
8041 (any_extend:V8HI
8042 (vec_select:V8QI
8043 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8044 (parallel [(const_int 0)
8045 (const_int 1)
8046 (const_int 2)
8047 (const_int 3)
8048 (const_int 4)
8049 (const_int 5)
8050 (const_int 6)
8051 (const_int 7)]))))]
8052 "TARGET_SSE4_1"
8053 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
8054 [(set_attr "type" "ssemov")
8055 (set_attr "prefix_extra" "1")
8056 (set_attr "prefix" "maybe_vex")
8057 (set_attr "mode" "TI")])
8058
8059 (define_insn "sse4_1_<code>v4qiv4si2"
8060 [(set (match_operand:V4SI 0 "register_operand" "=x")
8061 (any_extend:V4SI
8062 (vec_select:V4QI
8063 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8064 (parallel [(const_int 0)
8065 (const_int 1)
8066 (const_int 2)
8067 (const_int 3)]))))]
8068 "TARGET_SSE4_1"
8069 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
8070 [(set_attr "type" "ssemov")
8071 (set_attr "prefix_extra" "1")
8072 (set_attr "prefix" "maybe_vex")
8073 (set_attr "mode" "TI")])
8074
8075 (define_insn "sse4_1_<code>v4hiv4si2"
8076 [(set (match_operand:V4SI 0 "register_operand" "=x")
8077 (any_extend:V4SI
8078 (vec_select:V4HI
8079 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8080 (parallel [(const_int 0)
8081 (const_int 1)
8082 (const_int 2)
8083 (const_int 3)]))))]
8084 "TARGET_SSE4_1"
8085 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
8086 [(set_attr "type" "ssemov")
8087 (set_attr "prefix_extra" "1")
8088 (set_attr "prefix" "maybe_vex")
8089 (set_attr "mode" "TI")])
8090
8091 (define_insn "sse4_1_<code>v2qiv2di2"
8092 [(set (match_operand:V2DI 0 "register_operand" "=x")
8093 (any_extend:V2DI
8094 (vec_select:V2QI
8095 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8096 (parallel [(const_int 0)
8097 (const_int 1)]))))]
8098 "TARGET_SSE4_1"
8099 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
8100 [(set_attr "type" "ssemov")
8101 (set_attr "prefix_extra" "1")
8102 (set_attr "prefix" "maybe_vex")
8103 (set_attr "mode" "TI")])
8104
8105 (define_insn "sse4_1_<code>v2hiv2di2"
8106 [(set (match_operand:V2DI 0 "register_operand" "=x")
8107 (any_extend:V2DI
8108 (vec_select:V2HI
8109 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8110 (parallel [(const_int 0)
8111 (const_int 1)]))))]
8112 "TARGET_SSE4_1"
8113 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
8114 [(set_attr "type" "ssemov")
8115 (set_attr "prefix_extra" "1")
8116 (set_attr "prefix" "maybe_vex")
8117 (set_attr "mode" "TI")])
8118
8119 (define_insn "sse4_1_<code>v2siv2di2"
8120 [(set (match_operand:V2DI 0 "register_operand" "=x")
8121 (any_extend:V2DI
8122 (vec_select:V2SI
8123 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8124 (parallel [(const_int 0)
8125 (const_int 1)]))))]
8126 "TARGET_SSE4_1"
8127 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
8128 [(set_attr "type" "ssemov")
8129 (set_attr "prefix_extra" "1")
8130 (set_attr "prefix" "maybe_vex")
8131 (set_attr "mode" "TI")])
8132
8133 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
8134 ;; setting FLAGS_REG. But it is not a really compare instruction.
8135 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
8136 [(set (reg:CC FLAGS_REG)
8137 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
8138 (match_operand:VF 1 "nonimmediate_operand" "xm")]
8139 UNSPEC_VTESTP))]
8140 "TARGET_AVX"
8141 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
8142 [(set_attr "type" "ssecomi")
8143 (set_attr "prefix_extra" "1")
8144 (set_attr "prefix" "vex")
8145 (set_attr "mode" "<MODE>")])
8146
8147 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
8148 ;; But it is not a really compare instruction.
8149 (define_insn "avx_ptest256"
8150 [(set (reg:CC FLAGS_REG)
8151 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
8152 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
8153 UNSPEC_PTEST))]
8154 "TARGET_AVX"
8155 "vptest\t{%1, %0|%0, %1}"
8156 [(set_attr "type" "ssecomi")
8157 (set_attr "prefix_extra" "1")
8158 (set_attr "prefix" "vex")
8159 (set_attr "mode" "OI")])
8160
8161 (define_insn "sse4_1_ptest"
8162 [(set (reg:CC FLAGS_REG)
8163 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
8164 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8165 UNSPEC_PTEST))]
8166 "TARGET_SSE4_1"
8167 "%vptest\t{%1, %0|%0, %1}"
8168 [(set_attr "type" "ssecomi")
8169 (set_attr "prefix_extra" "1")
8170 (set_attr "prefix" "maybe_vex")
8171 (set_attr "mode" "TI")])
8172
8173 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
8174 [(set (match_operand:VF 0 "register_operand" "=x")
8175 (unspec:VF
8176 [(match_operand:VF 1 "nonimmediate_operand" "xm")
8177 (match_operand:SI 2 "const_0_to_15_operand" "n")]
8178 UNSPEC_ROUND))]
8179 "TARGET_ROUND"
8180 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8181 [(set_attr "type" "ssecvt")
8182 (set (attr "prefix_data16")
8183 (if_then_else
8184 (ne (symbol_ref "TARGET_AVX") (const_int 0))
8185 (const_string "*")
8186 (const_string "1")))
8187 (set_attr "prefix_extra" "1")
8188 (set_attr "length_immediate" "1")
8189 (set_attr "prefix" "maybe_vex")
8190 (set_attr "mode" "<MODE>")])
8191
8192 (define_insn "sse4_1_round<ssescalarmodesuffix>"
8193 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
8194 (vec_merge:VF_128
8195 (unspec:VF_128
8196 [(match_operand:VF_128 2 "register_operand" "x,x")
8197 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
8198 UNSPEC_ROUND)
8199 (match_operand:VF_128 1 "register_operand" "0,x")
8200 (const_int 1)))]
8201 "TARGET_ROUND"
8202 "@
8203 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
8204 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8205 [(set_attr "isa" "noavx,avx")
8206 (set_attr "type" "ssecvt")
8207 (set_attr "length_immediate" "1")
8208 (set_attr "prefix_data16" "1,*")
8209 (set_attr "prefix_extra" "1")
8210 (set_attr "prefix" "orig,vex")
8211 (set_attr "mode" "<MODE>")])
8212
8213 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8214 ;;
8215 ;; Intel SSE4.2 string/text processing instructions
8216 ;;
8217 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8218
8219 (define_insn_and_split "sse4_2_pcmpestr"
8220 [(set (match_operand:SI 0 "register_operand" "=c,c")
8221 (unspec:SI
8222 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
8223 (match_operand:SI 3 "register_operand" "a,a")
8224 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
8225 (match_operand:SI 5 "register_operand" "d,d")
8226 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
8227 UNSPEC_PCMPESTR))
8228 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
8229 (unspec:V16QI
8230 [(match_dup 2)
8231 (match_dup 3)
8232 (match_dup 4)
8233 (match_dup 5)
8234 (match_dup 6)]
8235 UNSPEC_PCMPESTR))
8236 (set (reg:CC FLAGS_REG)
8237 (unspec:CC
8238 [(match_dup 2)
8239 (match_dup 3)
8240 (match_dup 4)
8241 (match_dup 5)
8242 (match_dup 6)]
8243 UNSPEC_PCMPESTR))]
8244 "TARGET_SSE4_2
8245 && can_create_pseudo_p ()"
8246 "#"
8247 "&& 1"
8248 [(const_int 0)]
8249 {
8250 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
8251 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
8252 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
8253
8254 if (ecx)
8255 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
8256 operands[3], operands[4],
8257 operands[5], operands[6]));
8258 if (xmm0)
8259 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
8260 operands[3], operands[4],
8261 operands[5], operands[6]));
8262 if (flags && !(ecx || xmm0))
8263 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
8264 operands[2], operands[3],
8265 operands[4], operands[5],
8266 operands[6]));
8267 DONE;
8268 }
8269 [(set_attr "type" "sselog")
8270 (set_attr "prefix_data16" "1")
8271 (set_attr "prefix_extra" "1")
8272 (set_attr "length_immediate" "1")
8273 (set_attr "memory" "none,load")
8274 (set_attr "mode" "TI")])
8275
8276 (define_insn "sse4_2_pcmpestri"
8277 [(set (match_operand:SI 0 "register_operand" "=c,c")
8278 (unspec:SI
8279 [(match_operand:V16QI 1 "register_operand" "x,x")
8280 (match_operand:SI 2 "register_operand" "a,a")
8281 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
8282 (match_operand:SI 4 "register_operand" "d,d")
8283 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
8284 UNSPEC_PCMPESTR))
8285 (set (reg:CC FLAGS_REG)
8286 (unspec:CC
8287 [(match_dup 1)
8288 (match_dup 2)
8289 (match_dup 3)
8290 (match_dup 4)
8291 (match_dup 5)]
8292 UNSPEC_PCMPESTR))]
8293 "TARGET_SSE4_2"
8294 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
8295 [(set_attr "type" "sselog")
8296 (set_attr "prefix_data16" "1")
8297 (set_attr "prefix_extra" "1")
8298 (set_attr "prefix" "maybe_vex")
8299 (set_attr "length_immediate" "1")
8300 (set_attr "memory" "none,load")
8301 (set_attr "mode" "TI")])
8302
8303 (define_insn "sse4_2_pcmpestrm"
8304 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
8305 (unspec:V16QI
8306 [(match_operand:V16QI 1 "register_operand" "x,x")
8307 (match_operand:SI 2 "register_operand" "a,a")
8308 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
8309 (match_operand:SI 4 "register_operand" "d,d")
8310 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
8311 UNSPEC_PCMPESTR))
8312 (set (reg:CC FLAGS_REG)
8313 (unspec:CC
8314 [(match_dup 1)
8315 (match_dup 2)
8316 (match_dup 3)
8317 (match_dup 4)
8318 (match_dup 5)]
8319 UNSPEC_PCMPESTR))]
8320 "TARGET_SSE4_2"
8321 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
8322 [(set_attr "type" "sselog")
8323 (set_attr "prefix_data16" "1")
8324 (set_attr "prefix_extra" "1")
8325 (set_attr "length_immediate" "1")
8326 (set_attr "prefix" "maybe_vex")
8327 (set_attr "memory" "none,load")
8328 (set_attr "mode" "TI")])
8329
8330 (define_insn "sse4_2_pcmpestr_cconly"
8331 [(set (reg:CC FLAGS_REG)
8332 (unspec:CC
8333 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
8334 (match_operand:SI 3 "register_operand" "a,a,a,a")
8335 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
8336 (match_operand:SI 5 "register_operand" "d,d,d,d")
8337 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
8338 UNSPEC_PCMPESTR))
8339 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
8340 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
8341 "TARGET_SSE4_2"
8342 "@
8343 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
8344 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
8345 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
8346 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
8347 [(set_attr "type" "sselog")
8348 (set_attr "prefix_data16" "1")
8349 (set_attr "prefix_extra" "1")
8350 (set_attr "length_immediate" "1")
8351 (set_attr "memory" "none,load,none,load")
8352 (set_attr "prefix" "maybe_vex")
8353 (set_attr "mode" "TI")])
8354
8355 (define_insn_and_split "sse4_2_pcmpistr"
8356 [(set (match_operand:SI 0 "register_operand" "=c,c")
8357 (unspec:SI
8358 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
8359 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
8360 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
8361 UNSPEC_PCMPISTR))
8362 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
8363 (unspec:V16QI
8364 [(match_dup 2)
8365 (match_dup 3)
8366 (match_dup 4)]
8367 UNSPEC_PCMPISTR))
8368 (set (reg:CC FLAGS_REG)
8369 (unspec:CC
8370 [(match_dup 2)
8371 (match_dup 3)
8372 (match_dup 4)]
8373 UNSPEC_PCMPISTR))]
8374 "TARGET_SSE4_2
8375 && can_create_pseudo_p ()"
8376 "#"
8377 "&& 1"
8378 [(const_int 0)]
8379 {
8380 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
8381 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
8382 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
8383
8384 if (ecx)
8385 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
8386 operands[3], operands[4]));
8387 if (xmm0)
8388 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
8389 operands[3], operands[4]));
8390 if (flags && !(ecx || xmm0))
8391 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
8392 operands[2], operands[3],
8393 operands[4]));
8394 DONE;
8395 }
8396 [(set_attr "type" "sselog")
8397 (set_attr "prefix_data16" "1")
8398 (set_attr "prefix_extra" "1")
8399 (set_attr "length_immediate" "1")
8400 (set_attr "memory" "none,load")
8401 (set_attr "mode" "TI")])
8402
8403 (define_insn "sse4_2_pcmpistri"
8404 [(set (match_operand:SI 0 "register_operand" "=c,c")
8405 (unspec:SI
8406 [(match_operand:V16QI 1 "register_operand" "x,x")
8407 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
8408 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8409 UNSPEC_PCMPISTR))
8410 (set (reg:CC FLAGS_REG)
8411 (unspec:CC
8412 [(match_dup 1)
8413 (match_dup 2)
8414 (match_dup 3)]
8415 UNSPEC_PCMPISTR))]
8416 "TARGET_SSE4_2"
8417 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
8418 [(set_attr "type" "sselog")
8419 (set_attr "prefix_data16" "1")
8420 (set_attr "prefix_extra" "1")
8421 (set_attr "length_immediate" "1")
8422 (set_attr "prefix" "maybe_vex")
8423 (set_attr "memory" "none,load")
8424 (set_attr "mode" "TI")])
8425
8426 (define_insn "sse4_2_pcmpistrm"
8427 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
8428 (unspec:V16QI
8429 [(match_operand:V16QI 1 "register_operand" "x,x")
8430 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
8431 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8432 UNSPEC_PCMPISTR))
8433 (set (reg:CC FLAGS_REG)
8434 (unspec:CC
8435 [(match_dup 1)
8436 (match_dup 2)
8437 (match_dup 3)]
8438 UNSPEC_PCMPISTR))]
8439 "TARGET_SSE4_2"
8440 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
8441 [(set_attr "type" "sselog")
8442 (set_attr "prefix_data16" "1")
8443 (set_attr "prefix_extra" "1")
8444 (set_attr "length_immediate" "1")
8445 (set_attr "prefix" "maybe_vex")
8446 (set_attr "memory" "none,load")
8447 (set_attr "mode" "TI")])
8448
8449 (define_insn "sse4_2_pcmpistr_cconly"
8450 [(set (reg:CC FLAGS_REG)
8451 (unspec:CC
8452 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
8453 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
8454 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
8455 UNSPEC_PCMPISTR))
8456 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
8457 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
8458 "TARGET_SSE4_2"
8459 "@
8460 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
8461 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
8462 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
8463 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
8464 [(set_attr "type" "sselog")
8465 (set_attr "prefix_data16" "1")
8466 (set_attr "prefix_extra" "1")
8467 (set_attr "length_immediate" "1")
8468 (set_attr "memory" "none,load,none,load")
8469 (set_attr "prefix" "maybe_vex")
8470 (set_attr "mode" "TI")])
8471
8472 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8473 ;;
8474 ;; XOP instructions
8475 ;;
8476 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8477
8478 ;; XOP parallel integer multiply/add instructions.
8479 ;; Note the XOP multiply/add instructions
8480 ;; a[i] = b[i] * c[i] + d[i];
8481 ;; do not allow the value being added to be a memory operation.
8482 (define_insn "xop_pmacsww"
8483 [(set (match_operand:V8HI 0 "register_operand" "=x")
8484 (plus:V8HI
8485 (mult:V8HI
8486 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8487 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
8488 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
8489 "TARGET_XOP"
8490 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8491 [(set_attr "type" "ssemuladd")
8492 (set_attr "mode" "TI")])
8493
8494 (define_insn "xop_pmacssww"
8495 [(set (match_operand:V8HI 0 "register_operand" "=x")
8496 (ss_plus:V8HI
8497 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8498 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
8499 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
8500 "TARGET_XOP"
8501 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8502 [(set_attr "type" "ssemuladd")
8503 (set_attr "mode" "TI")])
8504
8505 (define_insn "xop_pmacsdd"
8506 [(set (match_operand:V4SI 0 "register_operand" "=x")
8507 (plus:V4SI
8508 (mult:V4SI
8509 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8510 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
8511 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8512 "TARGET_XOP"
8513 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8514 [(set_attr "type" "ssemuladd")
8515 (set_attr "mode" "TI")])
8516
8517 (define_insn "xop_pmacssdd"
8518 [(set (match_operand:V4SI 0 "register_operand" "=x")
8519 (ss_plus:V4SI
8520 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8521 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
8522 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8523 "TARGET_XOP"
8524 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8525 [(set_attr "type" "ssemuladd")
8526 (set_attr "mode" "TI")])
8527
8528 (define_insn "xop_pmacssdql"
8529 [(set (match_operand:V2DI 0 "register_operand" "=x")
8530 (ss_plus:V2DI
8531 (mult:V2DI
8532 (sign_extend:V2DI
8533 (vec_select:V2SI
8534 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8535 (parallel [(const_int 1)
8536 (const_int 3)])))
8537 (vec_select:V2SI
8538 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8539 (parallel [(const_int 1)
8540 (const_int 3)])))
8541 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8542 "TARGET_XOP"
8543 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8544 [(set_attr "type" "ssemuladd")
8545 (set_attr "mode" "TI")])
8546
8547 (define_insn "xop_pmacssdqh"
8548 [(set (match_operand:V2DI 0 "register_operand" "=x")
8549 (ss_plus:V2DI
8550 (mult:V2DI
8551 (sign_extend:V2DI
8552 (vec_select:V2SI
8553 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8554 (parallel [(const_int 0)
8555 (const_int 2)])))
8556 (sign_extend:V2DI
8557 (vec_select:V2SI
8558 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8559 (parallel [(const_int 0)
8560 (const_int 2)]))))
8561 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8562 "TARGET_XOP"
8563 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8564 [(set_attr "type" "ssemuladd")
8565 (set_attr "mode" "TI")])
8566
8567 (define_insn "xop_pmacsdql"
8568 [(set (match_operand:V2DI 0 "register_operand" "=x")
8569 (plus:V2DI
8570 (mult:V2DI
8571 (sign_extend:V2DI
8572 (vec_select:V2SI
8573 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8574 (parallel [(const_int 1)
8575 (const_int 3)])))
8576 (sign_extend:V2DI
8577 (vec_select:V2SI
8578 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8579 (parallel [(const_int 1)
8580 (const_int 3)]))))
8581 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8582 "TARGET_XOP"
8583 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8584 [(set_attr "type" "ssemuladd")
8585 (set_attr "mode" "TI")])
8586
8587 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
8588 ;; fake it with a multiply/add. In general, we expect the define_split to
8589 ;; occur before register allocation, so we have to handle the corner case where
8590 ;; the target is the same as operands 1/2
8591 (define_insn_and_split "xop_mulv2div2di3_low"
8592 [(set (match_operand:V2DI 0 "register_operand" "=&x")
8593 (mult:V2DI
8594 (sign_extend:V2DI
8595 (vec_select:V2SI
8596 (match_operand:V4SI 1 "register_operand" "%x")
8597 (parallel [(const_int 1)
8598 (const_int 3)])))
8599 (sign_extend:V2DI
8600 (vec_select:V2SI
8601 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8602 (parallel [(const_int 1)
8603 (const_int 3)])))))]
8604 "TARGET_XOP"
8605 "#"
8606 "&& reload_completed"
8607 [(set (match_dup 0)
8608 (match_dup 3))
8609 (set (match_dup 0)
8610 (plus:V2DI
8611 (mult:V2DI
8612 (sign_extend:V2DI
8613 (vec_select:V2SI
8614 (match_dup 1)
8615 (parallel [(const_int 1)
8616 (const_int 3)])))
8617 (sign_extend:V2DI
8618 (vec_select:V2SI
8619 (match_dup 2)
8620 (parallel [(const_int 1)
8621 (const_int 3)]))))
8622 (match_dup 0)))]
8623 {
8624 operands[3] = CONST0_RTX (V2DImode);
8625 }
8626 [(set_attr "type" "ssemul")
8627 (set_attr "mode" "TI")])
8628
8629 (define_insn "xop_pmacsdqh"
8630 [(set (match_operand:V2DI 0 "register_operand" "=x")
8631 (plus:V2DI
8632 (mult:V2DI
8633 (sign_extend:V2DI
8634 (vec_select:V2SI
8635 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8636 (parallel [(const_int 0)
8637 (const_int 2)])))
8638 (sign_extend:V2DI
8639 (vec_select:V2SI
8640 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8641 (parallel [(const_int 0)
8642 (const_int 2)]))))
8643 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8644 "TARGET_XOP"
8645 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8646 [(set_attr "type" "ssemuladd")
8647 (set_attr "mode" "TI")])
8648
8649 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
8650 ;; fake it with a multiply/add. In general, we expect the define_split to
8651 ;; occur before register allocation, so we have to handle the corner case where
8652 ;; the target is the same as either operands[1] or operands[2]
8653 (define_insn_and_split "xop_mulv2div2di3_high"
8654 [(set (match_operand:V2DI 0 "register_operand" "=&x")
8655 (mult:V2DI
8656 (sign_extend:V2DI
8657 (vec_select:V2SI
8658 (match_operand:V4SI 1 "register_operand" "%x")
8659 (parallel [(const_int 0)
8660 (const_int 2)])))
8661 (sign_extend:V2DI
8662 (vec_select:V2SI
8663 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8664 (parallel [(const_int 0)
8665 (const_int 2)])))))]
8666 "TARGET_XOP"
8667 "#"
8668 "&& reload_completed"
8669 [(set (match_dup 0)
8670 (match_dup 3))
8671 (set (match_dup 0)
8672 (plus:V2DI
8673 (mult:V2DI
8674 (sign_extend:V2DI
8675 (vec_select:V2SI
8676 (match_dup 1)
8677 (parallel [(const_int 0)
8678 (const_int 2)])))
8679 (sign_extend:V2DI
8680 (vec_select:V2SI
8681 (match_dup 2)
8682 (parallel [(const_int 0)
8683 (const_int 2)]))))
8684 (match_dup 0)))]
8685 {
8686 operands[3] = CONST0_RTX (V2DImode);
8687 }
8688 [(set_attr "type" "ssemul")
8689 (set_attr "mode" "TI")])
8690
8691 ;; XOP parallel integer multiply/add instructions for the intrinisics
8692 (define_insn "xop_pmacsswd"
8693 [(set (match_operand:V4SI 0 "register_operand" "=x")
8694 (ss_plus:V4SI
8695 (mult:V4SI
8696 (sign_extend:V4SI
8697 (vec_select:V4HI
8698 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8699 (parallel [(const_int 1)
8700 (const_int 3)
8701 (const_int 5)
8702 (const_int 7)])))
8703 (sign_extend:V4SI
8704 (vec_select:V4HI
8705 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8706 (parallel [(const_int 1)
8707 (const_int 3)
8708 (const_int 5)
8709 (const_int 7)]))))
8710 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8711 "TARGET_XOP"
8712 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8713 [(set_attr "type" "ssemuladd")
8714 (set_attr "mode" "TI")])
8715
8716 (define_insn "xop_pmacswd"
8717 [(set (match_operand:V4SI 0 "register_operand" "=x")
8718 (plus:V4SI
8719 (mult:V4SI
8720 (sign_extend:V4SI
8721 (vec_select:V4HI
8722 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8723 (parallel [(const_int 1)
8724 (const_int 3)
8725 (const_int 5)
8726 (const_int 7)])))
8727 (sign_extend:V4SI
8728 (vec_select:V4HI
8729 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8730 (parallel [(const_int 1)
8731 (const_int 3)
8732 (const_int 5)
8733 (const_int 7)]))))
8734 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8735 "TARGET_XOP"
8736 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8737 [(set_attr "type" "ssemuladd")
8738 (set_attr "mode" "TI")])
8739
8740 (define_insn "xop_pmadcsswd"
8741 [(set (match_operand:V4SI 0 "register_operand" "=x")
8742 (ss_plus:V4SI
8743 (plus:V4SI
8744 (mult:V4SI
8745 (sign_extend:V4SI
8746 (vec_select:V4HI
8747 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8748 (parallel [(const_int 0)
8749 (const_int 2)
8750 (const_int 4)
8751 (const_int 6)])))
8752 (sign_extend:V4SI
8753 (vec_select:V4HI
8754 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8755 (parallel [(const_int 0)
8756 (const_int 2)
8757 (const_int 4)
8758 (const_int 6)]))))
8759 (mult:V4SI
8760 (sign_extend:V4SI
8761 (vec_select:V4HI
8762 (match_dup 1)
8763 (parallel [(const_int 1)
8764 (const_int 3)
8765 (const_int 5)
8766 (const_int 7)])))
8767 (sign_extend:V4SI
8768 (vec_select:V4HI
8769 (match_dup 2)
8770 (parallel [(const_int 1)
8771 (const_int 3)
8772 (const_int 5)
8773 (const_int 7)])))))
8774 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8775 "TARGET_XOP"
8776 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8777 [(set_attr "type" "ssemuladd")
8778 (set_attr "mode" "TI")])
8779
8780 (define_insn "xop_pmadcswd"
8781 [(set (match_operand:V4SI 0 "register_operand" "=x")
8782 (plus:V4SI
8783 (plus:V4SI
8784 (mult:V4SI
8785 (sign_extend:V4SI
8786 (vec_select:V4HI
8787 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8788 (parallel [(const_int 0)
8789 (const_int 2)
8790 (const_int 4)
8791 (const_int 6)])))
8792 (sign_extend:V4SI
8793 (vec_select:V4HI
8794 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8795 (parallel [(const_int 0)
8796 (const_int 2)
8797 (const_int 4)
8798 (const_int 6)]))))
8799 (mult:V4SI
8800 (sign_extend:V4SI
8801 (vec_select:V4HI
8802 (match_dup 1)
8803 (parallel [(const_int 1)
8804 (const_int 3)
8805 (const_int 5)
8806 (const_int 7)])))
8807 (sign_extend:V4SI
8808 (vec_select:V4HI
8809 (match_dup 2)
8810 (parallel [(const_int 1)
8811 (const_int 3)
8812 (const_int 5)
8813 (const_int 7)])))))
8814 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8815 "TARGET_XOP"
8816 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8817 [(set_attr "type" "ssemuladd")
8818 (set_attr "mode" "TI")])
8819
8820 ;; XOP parallel XMM conditional moves
8821 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
8822 [(set (match_operand:V 0 "register_operand" "=x,x")
8823 (if_then_else:V
8824 (match_operand:V 3 "nonimmediate_operand" "x,m")
8825 (match_operand:V 1 "vector_move_operand" "x,x")
8826 (match_operand:V 2 "vector_move_operand" "xm,x")))]
8827 "TARGET_XOP"
8828 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8829 [(set_attr "type" "sse4arg")])
8830
8831 ;; XOP horizontal add/subtract instructions
8832 (define_insn "xop_phaddbw"
8833 [(set (match_operand:V8HI 0 "register_operand" "=x")
8834 (plus:V8HI
8835 (sign_extend:V8HI
8836 (vec_select:V8QI
8837 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8838 (parallel [(const_int 0)
8839 (const_int 2)
8840 (const_int 4)
8841 (const_int 6)
8842 (const_int 8)
8843 (const_int 10)
8844 (const_int 12)
8845 (const_int 14)])))
8846 (sign_extend:V8HI
8847 (vec_select:V8QI
8848 (match_dup 1)
8849 (parallel [(const_int 1)
8850 (const_int 3)
8851 (const_int 5)
8852 (const_int 7)
8853 (const_int 9)
8854 (const_int 11)
8855 (const_int 13)
8856 (const_int 15)])))))]
8857 "TARGET_XOP"
8858 "vphaddbw\t{%1, %0|%0, %1}"
8859 [(set_attr "type" "sseiadd1")])
8860
8861 (define_insn "xop_phaddbd"
8862 [(set (match_operand:V4SI 0 "register_operand" "=x")
8863 (plus:V4SI
8864 (plus:V4SI
8865 (sign_extend:V4SI
8866 (vec_select:V4QI
8867 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8868 (parallel [(const_int 0)
8869 (const_int 4)
8870 (const_int 8)
8871 (const_int 12)])))
8872 (sign_extend:V4SI
8873 (vec_select:V4QI
8874 (match_dup 1)
8875 (parallel [(const_int 1)
8876 (const_int 5)
8877 (const_int 9)
8878 (const_int 13)]))))
8879 (plus:V4SI
8880 (sign_extend:V4SI
8881 (vec_select:V4QI
8882 (match_dup 1)
8883 (parallel [(const_int 2)
8884 (const_int 6)
8885 (const_int 10)
8886 (const_int 14)])))
8887 (sign_extend:V4SI
8888 (vec_select:V4QI
8889 (match_dup 1)
8890 (parallel [(const_int 3)
8891 (const_int 7)
8892 (const_int 11)
8893 (const_int 15)]))))))]
8894 "TARGET_XOP"
8895 "vphaddbd\t{%1, %0|%0, %1}"
8896 [(set_attr "type" "sseiadd1")])
8897
8898 (define_insn "xop_phaddbq"
8899 [(set (match_operand:V2DI 0 "register_operand" "=x")
8900 (plus:V2DI
8901 (plus:V2DI
8902 (plus:V2DI
8903 (sign_extend:V2DI
8904 (vec_select:V2QI
8905 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8906 (parallel [(const_int 0)
8907 (const_int 4)])))
8908 (sign_extend:V2DI
8909 (vec_select:V2QI
8910 (match_dup 1)
8911 (parallel [(const_int 1)
8912 (const_int 5)]))))
8913 (plus:V2DI
8914 (sign_extend:V2DI
8915 (vec_select:V2QI
8916 (match_dup 1)
8917 (parallel [(const_int 2)
8918 (const_int 6)])))
8919 (sign_extend:V2DI
8920 (vec_select:V2QI
8921 (match_dup 1)
8922 (parallel [(const_int 3)
8923 (const_int 7)])))))
8924 (plus:V2DI
8925 (plus:V2DI
8926 (sign_extend:V2DI
8927 (vec_select:V2QI
8928 (match_dup 1)
8929 (parallel [(const_int 8)
8930 (const_int 12)])))
8931 (sign_extend:V2DI
8932 (vec_select:V2QI
8933 (match_dup 1)
8934 (parallel [(const_int 9)
8935 (const_int 13)]))))
8936 (plus:V2DI
8937 (sign_extend:V2DI
8938 (vec_select:V2QI
8939 (match_dup 1)
8940 (parallel [(const_int 10)
8941 (const_int 14)])))
8942 (sign_extend:V2DI
8943 (vec_select:V2QI
8944 (match_dup 1)
8945 (parallel [(const_int 11)
8946 (const_int 15)])))))))]
8947 "TARGET_XOP"
8948 "vphaddbq\t{%1, %0|%0, %1}"
8949 [(set_attr "type" "sseiadd1")])
8950
8951 (define_insn "xop_phaddwd"
8952 [(set (match_operand:V4SI 0 "register_operand" "=x")
8953 (plus:V4SI
8954 (sign_extend:V4SI
8955 (vec_select:V4HI
8956 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8957 (parallel [(const_int 0)
8958 (const_int 2)
8959 (const_int 4)
8960 (const_int 6)])))
8961 (sign_extend:V4SI
8962 (vec_select:V4HI
8963 (match_dup 1)
8964 (parallel [(const_int 1)
8965 (const_int 3)
8966 (const_int 5)
8967 (const_int 7)])))))]
8968 "TARGET_XOP"
8969 "vphaddwd\t{%1, %0|%0, %1}"
8970 [(set_attr "type" "sseiadd1")])
8971
8972 (define_insn "xop_phaddwq"
8973 [(set (match_operand:V2DI 0 "register_operand" "=x")
8974 (plus:V2DI
8975 (plus:V2DI
8976 (sign_extend:V2DI
8977 (vec_select:V2HI
8978 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8979 (parallel [(const_int 0)
8980 (const_int 4)])))
8981 (sign_extend:V2DI
8982 (vec_select:V2HI
8983 (match_dup 1)
8984 (parallel [(const_int 1)
8985 (const_int 5)]))))
8986 (plus:V2DI
8987 (sign_extend:V2DI
8988 (vec_select:V2HI
8989 (match_dup 1)
8990 (parallel [(const_int 2)
8991 (const_int 6)])))
8992 (sign_extend:V2DI
8993 (vec_select:V2HI
8994 (match_dup 1)
8995 (parallel [(const_int 3)
8996 (const_int 7)]))))))]
8997 "TARGET_XOP"
8998 "vphaddwq\t{%1, %0|%0, %1}"
8999 [(set_attr "type" "sseiadd1")])
9000
9001 (define_insn "xop_phadddq"
9002 [(set (match_operand:V2DI 0 "register_operand" "=x")
9003 (plus:V2DI
9004 (sign_extend:V2DI
9005 (vec_select:V2SI
9006 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9007 (parallel [(const_int 0)
9008 (const_int 2)])))
9009 (sign_extend:V2DI
9010 (vec_select:V2SI
9011 (match_dup 1)
9012 (parallel [(const_int 1)
9013 (const_int 3)])))))]
9014 "TARGET_XOP"
9015 "vphadddq\t{%1, %0|%0, %1}"
9016 [(set_attr "type" "sseiadd1")])
9017
9018 (define_insn "xop_phaddubw"
9019 [(set (match_operand:V8HI 0 "register_operand" "=x")
9020 (plus:V8HI
9021 (zero_extend:V8HI
9022 (vec_select:V8QI
9023 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9024 (parallel [(const_int 0)
9025 (const_int 2)
9026 (const_int 4)
9027 (const_int 6)
9028 (const_int 8)
9029 (const_int 10)
9030 (const_int 12)
9031 (const_int 14)])))
9032 (zero_extend:V8HI
9033 (vec_select:V8QI
9034 (match_dup 1)
9035 (parallel [(const_int 1)
9036 (const_int 3)
9037 (const_int 5)
9038 (const_int 7)
9039 (const_int 9)
9040 (const_int 11)
9041 (const_int 13)
9042 (const_int 15)])))))]
9043 "TARGET_XOP"
9044 "vphaddubw\t{%1, %0|%0, %1}"
9045 [(set_attr "type" "sseiadd1")])
9046
9047 (define_insn "xop_phaddubd"
9048 [(set (match_operand:V4SI 0 "register_operand" "=x")
9049 (plus:V4SI
9050 (plus:V4SI
9051 (zero_extend:V4SI
9052 (vec_select:V4QI
9053 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9054 (parallel [(const_int 0)
9055 (const_int 4)
9056 (const_int 8)
9057 (const_int 12)])))
9058 (zero_extend:V4SI
9059 (vec_select:V4QI
9060 (match_dup 1)
9061 (parallel [(const_int 1)
9062 (const_int 5)
9063 (const_int 9)
9064 (const_int 13)]))))
9065 (plus:V4SI
9066 (zero_extend:V4SI
9067 (vec_select:V4QI
9068 (match_dup 1)
9069 (parallel [(const_int 2)
9070 (const_int 6)
9071 (const_int 10)
9072 (const_int 14)])))
9073 (zero_extend:V4SI
9074 (vec_select:V4QI
9075 (match_dup 1)
9076 (parallel [(const_int 3)
9077 (const_int 7)
9078 (const_int 11)
9079 (const_int 15)]))))))]
9080 "TARGET_XOP"
9081 "vphaddubd\t{%1, %0|%0, %1}"
9082 [(set_attr "type" "sseiadd1")])
9083
9084 (define_insn "xop_phaddubq"
9085 [(set (match_operand:V2DI 0 "register_operand" "=x")
9086 (plus:V2DI
9087 (plus:V2DI
9088 (plus:V2DI
9089 (zero_extend:V2DI
9090 (vec_select:V2QI
9091 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9092 (parallel [(const_int 0)
9093 (const_int 4)])))
9094 (sign_extend:V2DI
9095 (vec_select:V2QI
9096 (match_dup 1)
9097 (parallel [(const_int 1)
9098 (const_int 5)]))))
9099 (plus:V2DI
9100 (zero_extend:V2DI
9101 (vec_select:V2QI
9102 (match_dup 1)
9103 (parallel [(const_int 2)
9104 (const_int 6)])))
9105 (zero_extend:V2DI
9106 (vec_select:V2QI
9107 (match_dup 1)
9108 (parallel [(const_int 3)
9109 (const_int 7)])))))
9110 (plus:V2DI
9111 (plus:V2DI
9112 (zero_extend:V2DI
9113 (vec_select:V2QI
9114 (match_dup 1)
9115 (parallel [(const_int 8)
9116 (const_int 12)])))
9117 (sign_extend:V2DI
9118 (vec_select:V2QI
9119 (match_dup 1)
9120 (parallel [(const_int 9)
9121 (const_int 13)]))))
9122 (plus:V2DI
9123 (zero_extend:V2DI
9124 (vec_select:V2QI
9125 (match_dup 1)
9126 (parallel [(const_int 10)
9127 (const_int 14)])))
9128 (zero_extend:V2DI
9129 (vec_select:V2QI
9130 (match_dup 1)
9131 (parallel [(const_int 11)
9132 (const_int 15)])))))))]
9133 "TARGET_XOP"
9134 "vphaddubq\t{%1, %0|%0, %1}"
9135 [(set_attr "type" "sseiadd1")])
9136
9137 (define_insn "xop_phadduwd"
9138 [(set (match_operand:V4SI 0 "register_operand" "=x")
9139 (plus:V4SI
9140 (zero_extend:V4SI
9141 (vec_select:V4HI
9142 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9143 (parallel [(const_int 0)
9144 (const_int 2)
9145 (const_int 4)
9146 (const_int 6)])))
9147 (zero_extend:V4SI
9148 (vec_select:V4HI
9149 (match_dup 1)
9150 (parallel [(const_int 1)
9151 (const_int 3)
9152 (const_int 5)
9153 (const_int 7)])))))]
9154 "TARGET_XOP"
9155 "vphadduwd\t{%1, %0|%0, %1}"
9156 [(set_attr "type" "sseiadd1")])
9157
9158 (define_insn "xop_phadduwq"
9159 [(set (match_operand:V2DI 0 "register_operand" "=x")
9160 (plus:V2DI
9161 (plus:V2DI
9162 (zero_extend:V2DI
9163 (vec_select:V2HI
9164 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9165 (parallel [(const_int 0)
9166 (const_int 4)])))
9167 (zero_extend:V2DI
9168 (vec_select:V2HI
9169 (match_dup 1)
9170 (parallel [(const_int 1)
9171 (const_int 5)]))))
9172 (plus:V2DI
9173 (zero_extend:V2DI
9174 (vec_select:V2HI
9175 (match_dup 1)
9176 (parallel [(const_int 2)
9177 (const_int 6)])))
9178 (zero_extend:V2DI
9179 (vec_select:V2HI
9180 (match_dup 1)
9181 (parallel [(const_int 3)
9182 (const_int 7)]))))))]
9183 "TARGET_XOP"
9184 "vphadduwq\t{%1, %0|%0, %1}"
9185 [(set_attr "type" "sseiadd1")])
9186
9187 (define_insn "xop_phaddudq"
9188 [(set (match_operand:V2DI 0 "register_operand" "=x")
9189 (plus:V2DI
9190 (zero_extend:V2DI
9191 (vec_select:V2SI
9192 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9193 (parallel [(const_int 0)
9194 (const_int 2)])))
9195 (zero_extend:V2DI
9196 (vec_select:V2SI
9197 (match_dup 1)
9198 (parallel [(const_int 1)
9199 (const_int 3)])))))]
9200 "TARGET_XOP"
9201 "vphaddudq\t{%1, %0|%0, %1}"
9202 [(set_attr "type" "sseiadd1")])
9203
9204 (define_insn "xop_phsubbw"
9205 [(set (match_operand:V8HI 0 "register_operand" "=x")
9206 (minus:V8HI
9207 (sign_extend:V8HI
9208 (vec_select:V8QI
9209 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9210 (parallel [(const_int 0)
9211 (const_int 2)
9212 (const_int 4)
9213 (const_int 6)
9214 (const_int 8)
9215 (const_int 10)
9216 (const_int 12)
9217 (const_int 14)])))
9218 (sign_extend:V8HI
9219 (vec_select:V8QI
9220 (match_dup 1)
9221 (parallel [(const_int 1)
9222 (const_int 3)
9223 (const_int 5)
9224 (const_int 7)
9225 (const_int 9)
9226 (const_int 11)
9227 (const_int 13)
9228 (const_int 15)])))))]
9229 "TARGET_XOP"
9230 "vphsubbw\t{%1, %0|%0, %1}"
9231 [(set_attr "type" "sseiadd1")])
9232
9233 (define_insn "xop_phsubwd"
9234 [(set (match_operand:V4SI 0 "register_operand" "=x")
9235 (minus:V4SI
9236 (sign_extend:V4SI
9237 (vec_select:V4HI
9238 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9239 (parallel [(const_int 0)
9240 (const_int 2)
9241 (const_int 4)
9242 (const_int 6)])))
9243 (sign_extend:V4SI
9244 (vec_select:V4HI
9245 (match_dup 1)
9246 (parallel [(const_int 1)
9247 (const_int 3)
9248 (const_int 5)
9249 (const_int 7)])))))]
9250 "TARGET_XOP"
9251 "vphsubwd\t{%1, %0|%0, %1}"
9252 [(set_attr "type" "sseiadd1")])
9253
9254 (define_insn "xop_phsubdq"
9255 [(set (match_operand:V2DI 0 "register_operand" "=x")
9256 (minus:V2DI
9257 (sign_extend:V2DI
9258 (vec_select:V2SI
9259 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9260 (parallel [(const_int 0)
9261 (const_int 2)])))
9262 (sign_extend:V2DI
9263 (vec_select:V2SI
9264 (match_dup 1)
9265 (parallel [(const_int 1)
9266 (const_int 3)])))))]
9267 "TARGET_XOP"
9268 "vphsubdq\t{%1, %0|%0, %1}"
9269 [(set_attr "type" "sseiadd1")])
9270
9271 ;; XOP permute instructions
9272 (define_insn "xop_pperm"
9273 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9274 (unspec:V16QI
9275 [(match_operand:V16QI 1 "register_operand" "x,x")
9276 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9277 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
9278 UNSPEC_XOP_PERMUTE))]
9279 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9280 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9281 [(set_attr "type" "sse4arg")
9282 (set_attr "mode" "TI")])
9283
9284 ;; XOP pack instructions that combine two vectors into a smaller vector
9285 (define_insn "xop_pperm_pack_v2di_v4si"
9286 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9287 (vec_concat:V4SI
9288 (truncate:V2SI
9289 (match_operand:V2DI 1 "register_operand" "x,x"))
9290 (truncate:V2SI
9291 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
9292 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9293 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9294 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9295 [(set_attr "type" "sse4arg")
9296 (set_attr "mode" "TI")])
9297
9298 (define_insn "xop_pperm_pack_v4si_v8hi"
9299 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9300 (vec_concat:V8HI
9301 (truncate:V4HI
9302 (match_operand:V4SI 1 "register_operand" "x,x"))
9303 (truncate:V4HI
9304 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
9305 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9306 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9307 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9308 [(set_attr "type" "sse4arg")
9309 (set_attr "mode" "TI")])
9310
9311 (define_insn "xop_pperm_pack_v8hi_v16qi"
9312 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9313 (vec_concat:V16QI
9314 (truncate:V8QI
9315 (match_operand:V8HI 1 "register_operand" "x,x"))
9316 (truncate:V8QI
9317 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
9318 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9319 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9320 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9321 [(set_attr "type" "sse4arg")
9322 (set_attr "mode" "TI")])
9323
9324 ;; XOP packed rotate instructions
9325 (define_expand "rotl<mode>3"
9326 [(set (match_operand:VI_128 0 "register_operand" "")
9327 (rotate:VI_128
9328 (match_operand:VI_128 1 "nonimmediate_operand" "")
9329 (match_operand:SI 2 "general_operand")))]
9330 "TARGET_XOP"
9331 {
9332 /* If we were given a scalar, convert it to parallel */
9333 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9334 {
9335 rtvec vs = rtvec_alloc (<ssescalarnum>);
9336 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9337 rtx reg = gen_reg_rtx (<MODE>mode);
9338 rtx op2 = operands[2];
9339 int i;
9340
9341 if (GET_MODE (op2) != <ssescalarmode>mode)
9342 {
9343 op2 = gen_reg_rtx (<ssescalarmode>mode);
9344 convert_move (op2, operands[2], false);
9345 }
9346
9347 for (i = 0; i < <ssescalarnum>; i++)
9348 RTVEC_ELT (vs, i) = op2;
9349
9350 emit_insn (gen_vec_init<mode> (reg, par));
9351 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9352 DONE;
9353 }
9354 })
9355
9356 (define_expand "rotr<mode>3"
9357 [(set (match_operand:VI_128 0 "register_operand" "")
9358 (rotatert:VI_128
9359 (match_operand:VI_128 1 "nonimmediate_operand" "")
9360 (match_operand:SI 2 "general_operand")))]
9361 "TARGET_XOP"
9362 {
9363 /* If we were given a scalar, convert it to parallel */
9364 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9365 {
9366 rtvec vs = rtvec_alloc (<ssescalarnum>);
9367 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9368 rtx neg = gen_reg_rtx (<MODE>mode);
9369 rtx reg = gen_reg_rtx (<MODE>mode);
9370 rtx op2 = operands[2];
9371 int i;
9372
9373 if (GET_MODE (op2) != <ssescalarmode>mode)
9374 {
9375 op2 = gen_reg_rtx (<ssescalarmode>mode);
9376 convert_move (op2, operands[2], false);
9377 }
9378
9379 for (i = 0; i < <ssescalarnum>; i++)
9380 RTVEC_ELT (vs, i) = op2;
9381
9382 emit_insn (gen_vec_init<mode> (reg, par));
9383 emit_insn (gen_neg<mode>2 (neg, reg));
9384 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
9385 DONE;
9386 }
9387 })
9388
9389 (define_insn "xop_rotl<mode>3"
9390 [(set (match_operand:VI_128 0 "register_operand" "=x")
9391 (rotate:VI_128
9392 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9393 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9394 "TARGET_XOP"
9395 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9396 [(set_attr "type" "sseishft")
9397 (set_attr "length_immediate" "1")
9398 (set_attr "mode" "TI")])
9399
9400 (define_insn "xop_rotr<mode>3"
9401 [(set (match_operand:VI_128 0 "register_operand" "=x")
9402 (rotatert:VI_128
9403 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9404 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9405 "TARGET_XOP"
9406 {
9407 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
9408 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
9409 }
9410 [(set_attr "type" "sseishft")
9411 (set_attr "length_immediate" "1")
9412 (set_attr "mode" "TI")])
9413
9414 (define_expand "vrotr<mode>3"
9415 [(match_operand:VI_128 0 "register_operand" "")
9416 (match_operand:VI_128 1 "register_operand" "")
9417 (match_operand:VI_128 2 "register_operand" "")]
9418 "TARGET_XOP"
9419 {
9420 rtx reg = gen_reg_rtx (<MODE>mode);
9421 emit_insn (gen_neg<mode>2 (reg, operands[2]));
9422 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9423 DONE;
9424 })
9425
9426 (define_expand "vrotl<mode>3"
9427 [(match_operand:VI_128 0 "register_operand" "")
9428 (match_operand:VI_128 1 "register_operand" "")
9429 (match_operand:VI_128 2 "register_operand" "")]
9430 "TARGET_XOP"
9431 {
9432 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
9433 DONE;
9434 })
9435
9436 (define_insn "xop_vrotl<mode>3"
9437 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9438 (if_then_else:VI_128
9439 (ge:VI_128
9440 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9441 (const_int 0))
9442 (rotate:VI_128
9443 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9444 (match_dup 2))
9445 (rotatert:VI_128
9446 (match_dup 1)
9447 (neg:VI_128 (match_dup 2)))))]
9448 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9449 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9450 [(set_attr "type" "sseishft")
9451 (set_attr "prefix_data16" "0")
9452 (set_attr "prefix_extra" "2")
9453 (set_attr "mode" "TI")])
9454
9455 ;; XOP packed shift instructions.
9456 ;; FIXME: add V2DI back in
9457 (define_expand "vlshr<mode>3"
9458 [(match_operand:VI124_128 0 "register_operand" "")
9459 (match_operand:VI124_128 1 "register_operand" "")
9460 (match_operand:VI124_128 2 "register_operand" "")]
9461 "TARGET_XOP"
9462 {
9463 rtx neg = gen_reg_rtx (<MODE>mode);
9464 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9465 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
9466 DONE;
9467 })
9468
9469 (define_expand "vashr<mode>3"
9470 [(match_operand:VI124_128 0 "register_operand" "")
9471 (match_operand:VI124_128 1 "register_operand" "")
9472 (match_operand:VI124_128 2 "register_operand" "")]
9473 "TARGET_XOP"
9474 {
9475 rtx neg = gen_reg_rtx (<MODE>mode);
9476 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9477 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
9478 DONE;
9479 })
9480
9481 (define_expand "vashl<mode>3"
9482 [(match_operand:VI124_128 0 "register_operand" "")
9483 (match_operand:VI124_128 1 "register_operand" "")
9484 (match_operand:VI124_128 2 "register_operand" "")]
9485 "TARGET_XOP"
9486 {
9487 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
9488 DONE;
9489 })
9490
9491 (define_insn "xop_ashl<mode>3"
9492 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9493 (if_then_else:VI_128
9494 (ge:VI_128
9495 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9496 (const_int 0))
9497 (ashift:VI_128
9498 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9499 (match_dup 2))
9500 (ashiftrt:VI_128
9501 (match_dup 1)
9502 (neg:VI_128 (match_dup 2)))))]
9503 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9504 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9505 [(set_attr "type" "sseishft")
9506 (set_attr "prefix_data16" "0")
9507 (set_attr "prefix_extra" "2")
9508 (set_attr "mode" "TI")])
9509
9510 (define_insn "xop_lshl<mode>3"
9511 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9512 (if_then_else:VI_128
9513 (ge:VI_128
9514 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9515 (const_int 0))
9516 (ashift:VI_128
9517 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9518 (match_dup 2))
9519 (lshiftrt:VI_128
9520 (match_dup 1)
9521 (neg:VI_128 (match_dup 2)))))]
9522 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9523 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9524 [(set_attr "type" "sseishft")
9525 (set_attr "prefix_data16" "0")
9526 (set_attr "prefix_extra" "2")
9527 (set_attr "mode" "TI")])
9528
9529 ;; SSE2 doesn't have some shift varients, so define versions for XOP
9530 (define_expand "ashlv16qi3"
9531 [(match_operand:V16QI 0 "register_operand" "")
9532 (match_operand:V16QI 1 "register_operand" "")
9533 (match_operand:SI 2 "nonmemory_operand" "")]
9534 "TARGET_XOP"
9535 {
9536 rtvec vs = rtvec_alloc (16);
9537 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9538 rtx reg = gen_reg_rtx (V16QImode);
9539 int i;
9540 for (i = 0; i < 16; i++)
9541 RTVEC_ELT (vs, i) = operands[2];
9542
9543 emit_insn (gen_vec_initv16qi (reg, par));
9544 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
9545 DONE;
9546 })
9547
9548 (define_expand "lshlv16qi3"
9549 [(match_operand:V16QI 0 "register_operand" "")
9550 (match_operand:V16QI 1 "register_operand" "")
9551 (match_operand:SI 2 "nonmemory_operand" "")]
9552 "TARGET_XOP"
9553 {
9554 rtvec vs = rtvec_alloc (16);
9555 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9556 rtx reg = gen_reg_rtx (V16QImode);
9557 int i;
9558 for (i = 0; i < 16; i++)
9559 RTVEC_ELT (vs, i) = operands[2];
9560
9561 emit_insn (gen_vec_initv16qi (reg, par));
9562 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
9563 DONE;
9564 })
9565
9566 (define_expand "ashrv16qi3"
9567 [(match_operand:V16QI 0 "register_operand" "")
9568 (match_operand:V16QI 1 "register_operand" "")
9569 (match_operand:SI 2 "nonmemory_operand" "")]
9570 "TARGET_XOP"
9571 {
9572 rtvec vs = rtvec_alloc (16);
9573 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9574 rtx reg = gen_reg_rtx (V16QImode);
9575 int i;
9576 rtx ele = ((CONST_INT_P (operands[2]))
9577 ? GEN_INT (- INTVAL (operands[2]))
9578 : operands[2]);
9579
9580 for (i = 0; i < 16; i++)
9581 RTVEC_ELT (vs, i) = ele;
9582
9583 emit_insn (gen_vec_initv16qi (reg, par));
9584
9585 if (!CONST_INT_P (operands[2]))
9586 {
9587 rtx neg = gen_reg_rtx (V16QImode);
9588 emit_insn (gen_negv16qi2 (neg, reg));
9589 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
9590 }
9591 else
9592 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
9593
9594 DONE;
9595 })
9596
9597 (define_expand "ashrv2di3"
9598 [(match_operand:V2DI 0 "register_operand" "")
9599 (match_operand:V2DI 1 "register_operand" "")
9600 (match_operand:DI 2 "nonmemory_operand" "")]
9601 "TARGET_XOP"
9602 {
9603 rtvec vs = rtvec_alloc (2);
9604 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
9605 rtx reg = gen_reg_rtx (V2DImode);
9606 rtx ele;
9607
9608 if (CONST_INT_P (operands[2]))
9609 ele = GEN_INT (- INTVAL (operands[2]));
9610 else if (GET_MODE (operands[2]) != DImode)
9611 {
9612 rtx move = gen_reg_rtx (DImode);
9613 ele = gen_reg_rtx (DImode);
9614 convert_move (move, operands[2], false);
9615 emit_insn (gen_negdi2 (ele, move));
9616 }
9617 else
9618 {
9619 ele = gen_reg_rtx (DImode);
9620 emit_insn (gen_negdi2 (ele, operands[2]));
9621 }
9622
9623 RTVEC_ELT (vs, 0) = ele;
9624 RTVEC_ELT (vs, 1) = ele;
9625 emit_insn (gen_vec_initv2di (reg, par));
9626 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
9627 DONE;
9628 })
9629
9630 ;; XOP FRCZ support
9631 (define_insn "xop_frcz<mode>2"
9632 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
9633 (unspec:FMAMODE
9634 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
9635 UNSPEC_FRCZ))]
9636 "TARGET_XOP"
9637 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
9638 [(set_attr "type" "ssecvt1")
9639 (set_attr "mode" "<MODE>")])
9640
9641 ;; scalar insns
9642 (define_expand "xop_vmfrcz<mode>2"
9643 [(set (match_operand:VF_128 0 "register_operand")
9644 (vec_merge:VF_128
9645 (unspec:VF_128
9646 [(match_operand:VF_128 1 "nonimmediate_operand")]
9647 UNSPEC_FRCZ)
9648 (match_dup 3)
9649 (const_int 1)))]
9650 "TARGET_XOP"
9651 {
9652 operands[3] = CONST0_RTX (<MODE>mode);
9653 })
9654
9655 (define_insn "*xop_vmfrcz_<mode>"
9656 [(set (match_operand:VF_128 0 "register_operand" "=x")
9657 (vec_merge:VF_128
9658 (unspec:VF_128
9659 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
9660 UNSPEC_FRCZ)
9661 (match_operand:VF_128 2 "const0_operand")
9662 (const_int 1)))]
9663 "TARGET_XOP"
9664 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9665 [(set_attr "type" "ssecvt1")
9666 (set_attr "mode" "<MODE>")])
9667
9668 (define_insn "xop_maskcmp<mode>3"
9669 [(set (match_operand:VI_128 0 "register_operand" "=x")
9670 (match_operator:VI_128 1 "ix86_comparison_int_operator"
9671 [(match_operand:VI_128 2 "register_operand" "x")
9672 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
9673 "TARGET_XOP"
9674 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9675 [(set_attr "type" "sse4arg")
9676 (set_attr "prefix_data16" "0")
9677 (set_attr "prefix_rep" "0")
9678 (set_attr "prefix_extra" "2")
9679 (set_attr "length_immediate" "1")
9680 (set_attr "mode" "TI")])
9681
9682 (define_insn "xop_maskcmp_uns<mode>3"
9683 [(set (match_operand:VI_128 0 "register_operand" "=x")
9684 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
9685 [(match_operand:VI_128 2 "register_operand" "x")
9686 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
9687 "TARGET_XOP"
9688 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9689 [(set_attr "type" "ssecmp")
9690 (set_attr "prefix_data16" "0")
9691 (set_attr "prefix_rep" "0")
9692 (set_attr "prefix_extra" "2")
9693 (set_attr "length_immediate" "1")
9694 (set_attr "mode" "TI")])
9695
9696 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
9697 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
9698 ;; the exact instruction generated for the intrinsic.
9699 (define_insn "xop_maskcmp_uns2<mode>3"
9700 [(set (match_operand:VI_128 0 "register_operand" "=x")
9701 (unspec:VI_128
9702 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
9703 [(match_operand:VI_128 2 "register_operand" "x")
9704 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
9705 UNSPEC_XOP_UNSIGNED_CMP))]
9706 "TARGET_XOP"
9707 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9708 [(set_attr "type" "ssecmp")
9709 (set_attr "prefix_data16" "0")
9710 (set_attr "prefix_extra" "2")
9711 (set_attr "length_immediate" "1")
9712 (set_attr "mode" "TI")])
9713
9714 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
9715 ;; being added here to be complete.
9716 (define_insn "xop_pcom_tf<mode>3"
9717 [(set (match_operand:VI_128 0 "register_operand" "=x")
9718 (unspec:VI_128
9719 [(match_operand:VI_128 1 "register_operand" "x")
9720 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
9721 (match_operand:SI 3 "const_int_operand" "n")]
9722 UNSPEC_XOP_TRUEFALSE))]
9723 "TARGET_XOP"
9724 {
9725 return ((INTVAL (operands[3]) != 0)
9726 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9727 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
9728 }
9729 [(set_attr "type" "ssecmp")
9730 (set_attr "prefix_data16" "0")
9731 (set_attr "prefix_extra" "2")
9732 (set_attr "length_immediate" "1")
9733 (set_attr "mode" "TI")])
9734
9735 (define_insn "xop_vpermil2<mode>3"
9736 [(set (match_operand:VF 0 "register_operand" "=x")
9737 (unspec:VF
9738 [(match_operand:VF 1 "register_operand" "x")
9739 (match_operand:VF 2 "nonimmediate_operand" "%x")
9740 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
9741 (match_operand:SI 4 "const_0_to_3_operand" "n")]
9742 UNSPEC_VPERMIL2))]
9743 "TARGET_XOP"
9744 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
9745 [(set_attr "type" "sse4arg")
9746 (set_attr "length_immediate" "1")
9747 (set_attr "mode" "<MODE>")])
9748
9749 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9750
9751 (define_insn "aesenc"
9752 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9753 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9754 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9755 UNSPEC_AESENC))]
9756 "TARGET_AES"
9757 "@
9758 aesenc\t{%2, %0|%0, %2}
9759 vaesenc\t{%2, %1, %0|%0, %1, %2}"
9760 [(set_attr "isa" "noavx,avx")
9761 (set_attr "type" "sselog1")
9762 (set_attr "prefix_extra" "1")
9763 (set_attr "prefix" "orig,vex")
9764 (set_attr "mode" "TI")])
9765
9766 (define_insn "aesenclast"
9767 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9768 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9769 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9770 UNSPEC_AESENCLAST))]
9771 "TARGET_AES"
9772 "@
9773 aesenclast\t{%2, %0|%0, %2}
9774 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
9775 [(set_attr "isa" "noavx,avx")
9776 (set_attr "type" "sselog1")
9777 (set_attr "prefix_extra" "1")
9778 (set_attr "prefix" "orig,vex")
9779 (set_attr "mode" "TI")])
9780
9781 (define_insn "aesdec"
9782 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9783 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9784 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9785 UNSPEC_AESDEC))]
9786 "TARGET_AES"
9787 "@
9788 aesdec\t{%2, %0|%0, %2}
9789 vaesdec\t{%2, %1, %0|%0, %1, %2}"
9790 [(set_attr "isa" "noavx,avx")
9791 (set_attr "type" "sselog1")
9792 (set_attr "prefix_extra" "1")
9793 (set_attr "prefix" "orig,vex")
9794 (set_attr "mode" "TI")])
9795
9796 (define_insn "aesdeclast"
9797 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9798 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9799 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9800 UNSPEC_AESDECLAST))]
9801 "TARGET_AES"
9802 "@
9803 aesdeclast\t{%2, %0|%0, %2}
9804 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
9805 [(set_attr "isa" "noavx,avx")
9806 (set_attr "type" "sselog1")
9807 (set_attr "prefix_extra" "1")
9808 (set_attr "prefix" "orig,vex")
9809 (set_attr "mode" "TI")])
9810
9811 (define_insn "aesimc"
9812 [(set (match_operand:V2DI 0 "register_operand" "=x")
9813 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9814 UNSPEC_AESIMC))]
9815 "TARGET_AES"
9816 "%vaesimc\t{%1, %0|%0, %1}"
9817 [(set_attr "type" "sselog1")
9818 (set_attr "prefix_extra" "1")
9819 (set_attr "prefix" "maybe_vex")
9820 (set_attr "mode" "TI")])
9821
9822 (define_insn "aeskeygenassist"
9823 [(set (match_operand:V2DI 0 "register_operand" "=x")
9824 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
9825 (match_operand:SI 2 "const_0_to_255_operand" "n")]
9826 UNSPEC_AESKEYGENASSIST))]
9827 "TARGET_AES"
9828 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
9829 [(set_attr "type" "sselog1")
9830 (set_attr "prefix_extra" "1")
9831 (set_attr "length_immediate" "1")
9832 (set_attr "prefix" "maybe_vex")
9833 (set_attr "mode" "TI")])
9834
9835 (define_insn "pclmulqdq"
9836 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9837 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9838 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
9839 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9840 UNSPEC_PCLMUL))]
9841 "TARGET_PCLMUL"
9842 "@
9843 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
9844 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9845 [(set_attr "isa" "noavx,avx")
9846 (set_attr "type" "sselog1")
9847 (set_attr "prefix_extra" "1")
9848 (set_attr "length_immediate" "1")
9849 (set_attr "prefix" "orig,vex")
9850 (set_attr "mode" "TI")])
9851
9852 (define_expand "avx_vzeroall"
9853 [(match_par_dup 0 [(const_int 0)])]
9854 "TARGET_AVX"
9855 {
9856 int nregs = TARGET_64BIT ? 16 : 8;
9857 int regno;
9858
9859 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
9860
9861 XVECEXP (operands[0], 0, 0)
9862 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
9863 UNSPECV_VZEROALL);
9864
9865 for (regno = 0; regno < nregs; regno++)
9866 XVECEXP (operands[0], 0, regno + 1)
9867 = gen_rtx_SET (VOIDmode,
9868 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
9869 CONST0_RTX (V8SImode));
9870 })
9871
9872 (define_insn "*avx_vzeroall"
9873 [(match_parallel 0 "vzeroall_operation"
9874 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
9875 "TARGET_AVX"
9876 "vzeroall"
9877 [(set_attr "type" "sse")
9878 (set_attr "modrm" "0")
9879 (set_attr "memory" "none")
9880 (set_attr "prefix" "vex")
9881 (set_attr "mode" "OI")])
9882
9883 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
9884 ;; if the upper 128bits are unused.
9885 (define_insn "avx_vzeroupper"
9886 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
9887 UNSPECV_VZEROUPPER)]
9888 "TARGET_AVX"
9889 "vzeroupper"
9890 [(set_attr "type" "sse")
9891 (set_attr "modrm" "0")
9892 (set_attr "memory" "none")
9893 (set_attr "prefix" "vex")
9894 (set_attr "mode" "OI")])
9895
9896 (define_insn "vec_dup<mode>"
9897 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
9898 (vec_duplicate:AVX256MODE24P
9899 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
9900 "TARGET_AVX"
9901 "@
9902 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
9903 #"
9904 [(set_attr "type" "ssemov")
9905 (set_attr "prefix_extra" "1")
9906 (set_attr "prefix" "vex")
9907 (set_attr "mode" "V8SF")])
9908
9909 (define_split
9910 [(set (match_operand:AVX256MODE24P 0 "register_operand" "")
9911 (vec_duplicate:AVX256MODE24P
9912 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
9913 "TARGET_AVX && reload_completed"
9914 [(set (match_dup 2) (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
9915 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
9916 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
9917
9918 (define_insn "avx_vbroadcastf128_<mode>"
9919 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
9920 (vec_concat:V_256
9921 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
9922 (match_dup 1)))]
9923 "TARGET_AVX"
9924 "@
9925 vbroadcastf128\t{%1, %0|%0, %1}
9926 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
9927 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
9928 [(set_attr "type" "ssemov,sselog1,sselog1")
9929 (set_attr "prefix_extra" "1")
9930 (set_attr "length_immediate" "0,1,1")
9931 (set_attr "prefix" "vex")
9932 (set_attr "mode" "V4SF,V8SF,V8SF")])
9933
9934 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
9935 ;; If it so happens that the input is in memory, use vbroadcast.
9936 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
9937 (define_insn "*avx_vperm_broadcast_v4sf"
9938 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
9939 (vec_select:V4SF
9940 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
9941 (match_parallel 2 "avx_vbroadcast_operand"
9942 [(match_operand 3 "const_int_operand" "C,n,n")])))]
9943 "TARGET_AVX"
9944 {
9945 int elt = INTVAL (operands[3]);
9946 switch (which_alternative)
9947 {
9948 case 0:
9949 case 1:
9950 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
9951 return "vbroadcastss\t{%1, %0|%0, %1}";
9952 case 2:
9953 operands[2] = GEN_INT (elt * 0x55);
9954 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
9955 default:
9956 gcc_unreachable ();
9957 }
9958 }
9959 [(set_attr "type" "ssemov,ssemov,sselog1")
9960 (set_attr "prefix_extra" "1")
9961 (set_attr "length_immediate" "0,0,1")
9962 (set_attr "prefix" "vex")
9963 (set_attr "mode" "SF,SF,V4SF")])
9964
9965 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
9966 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
9967 (vec_select:VF_256
9968 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
9969 (match_parallel 2 "avx_vbroadcast_operand"
9970 [(match_operand 3 "const_int_operand" "C,n,n")])))]
9971 "TARGET_AVX"
9972 "#"
9973 "&& reload_completed"
9974 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
9975 {
9976 rtx op0 = operands[0], op1 = operands[1];
9977 int elt = INTVAL (operands[3]);
9978
9979 if (REG_P (op1))
9980 {
9981 int mask;
9982
9983 /* Shuffle element we care about into all elements of the 128-bit lane.
9984 The other lane gets shuffled too, but we don't care. */
9985 if (<MODE>mode == V4DFmode)
9986 mask = (elt & 1 ? 15 : 0);
9987 else
9988 mask = (elt & 3) * 0x55;
9989 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
9990
9991 /* Shuffle the lane we care about into both lanes of the dest. */
9992 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
9993 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
9994 DONE;
9995 }
9996
9997 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
9998 elt * GET_MODE_SIZE (<ssescalarmode>mode));
9999 })
10000
10001 (define_expand "avx_vpermil<mode>"
10002 [(set (match_operand:VF2 0 "register_operand" "")
10003 (vec_select:VF2
10004 (match_operand:VF2 1 "nonimmediate_operand" "")
10005 (match_operand:SI 2 "const_0_to_255_operand" "")))]
10006 "TARGET_AVX"
10007 {
10008 int mask = INTVAL (operands[2]);
10009 rtx perm[<ssescalarnum>];
10010
10011 perm[0] = GEN_INT (mask & 1);
10012 perm[1] = GEN_INT ((mask >> 1) & 1);
10013 if (<MODE>mode == V4DFmode)
10014 {
10015 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
10016 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
10017 }
10018
10019 operands[2]
10020 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10021 })
10022
10023 (define_expand "avx_vpermil<mode>"
10024 [(set (match_operand:VF1 0 "register_operand" "")
10025 (vec_select:VF1
10026 (match_operand:VF1 1 "nonimmediate_operand" "")
10027 (match_operand:SI 2 "const_0_to_255_operand" "")))]
10028 "TARGET_AVX"
10029 {
10030 int mask = INTVAL (operands[2]);
10031 rtx perm[<ssescalarnum>];
10032
10033 perm[0] = GEN_INT (mask & 3);
10034 perm[1] = GEN_INT ((mask >> 2) & 3);
10035 perm[2] = GEN_INT ((mask >> 4) & 3);
10036 perm[3] = GEN_INT ((mask >> 6) & 3);
10037 if (<MODE>mode == V8SFmode)
10038 {
10039 perm[4] = GEN_INT ((mask & 3) + 4);
10040 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
10041 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
10042 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
10043 }
10044
10045 operands[2]
10046 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
10047 })
10048
10049 (define_insn "*avx_vpermilp<mode>"
10050 [(set (match_operand:VF 0 "register_operand" "=x")
10051 (vec_select:VF
10052 (match_operand:VF 1 "nonimmediate_operand" "xm")
10053 (match_parallel 2 ""
10054 [(match_operand 3 "const_int_operand" "")])))]
10055 "TARGET_AVX
10056 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
10057 {
10058 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
10059 operands[2] = GEN_INT (mask);
10060 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
10061 }
10062 [(set_attr "type" "sselog")
10063 (set_attr "prefix_extra" "1")
10064 (set_attr "length_immediate" "1")
10065 (set_attr "prefix" "vex")
10066 (set_attr "mode" "<MODE>")])
10067
10068 (define_insn "avx_vpermilvar<mode>3"
10069 [(set (match_operand:VF 0 "register_operand" "=x")
10070 (unspec:VF
10071 [(match_operand:VF 1 "register_operand" "x")
10072 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
10073 UNSPEC_VPERMIL))]
10074 "TARGET_AVX"
10075 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10076 [(set_attr "type" "sselog")
10077 (set_attr "prefix_extra" "1")
10078 (set_attr "prefix" "vex")
10079 (set_attr "mode" "<MODE>")])
10080
10081 (define_expand "avx_vperm2f128<mode>3"
10082 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
10083 (unspec:AVX256MODE2P
10084 [(match_operand:AVX256MODE2P 1 "register_operand" "")
10085 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
10086 (match_operand:SI 3 "const_0_to_255_operand" "")]
10087 UNSPEC_VPERMIL2F128))]
10088 "TARGET_AVX"
10089 {
10090 int mask = INTVAL (operands[3]);
10091 if ((mask & 0x88) == 0)
10092 {
10093 rtx perm[<ssescalarnum>], t1, t2;
10094 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
10095
10096 base = (mask & 3) * nelt2;
10097 for (i = 0; i < nelt2; ++i)
10098 perm[i] = GEN_INT (base + i);
10099
10100 base = ((mask >> 4) & 3) * nelt2;
10101 for (i = 0; i < nelt2; ++i)
10102 perm[i + nelt2] = GEN_INT (base + i);
10103
10104 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
10105 operands[1], operands[2]);
10106 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
10107 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
10108 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
10109 emit_insn (t2);
10110 DONE;
10111 }
10112 })
10113
10114 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
10115 ;; means that in order to represent this properly in rtl we'd have to
10116 ;; nest *another* vec_concat with a zero operand and do the select from
10117 ;; a 4x wide vector. That doesn't seem very nice.
10118 (define_insn "*avx_vperm2f128<mode>_full"
10119 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10120 (unspec:AVX256MODE2P
10121 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
10122 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
10123 (match_operand:SI 3 "const_0_to_255_operand" "n")]
10124 UNSPEC_VPERMIL2F128))]
10125 "TARGET_AVX"
10126 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10127 [(set_attr "type" "sselog")
10128 (set_attr "prefix_extra" "1")
10129 (set_attr "length_immediate" "1")
10130 (set_attr "prefix" "vex")
10131 (set_attr "mode" "V8SF")])
10132
10133 (define_insn "*avx_vperm2f128<mode>_nozero"
10134 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10135 (vec_select:AVX256MODE2P
10136 (vec_concat:<ssedoublevecmode>
10137 (match_operand:AVX256MODE2P 1 "register_operand" "x")
10138 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
10139 (match_parallel 3 ""
10140 [(match_operand 4 "const_int_operand" "")])))]
10141 "TARGET_AVX
10142 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
10143 {
10144 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
10145 operands[3] = GEN_INT (mask);
10146 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10147 }
10148 [(set_attr "type" "sselog")
10149 (set_attr "prefix_extra" "1")
10150 (set_attr "length_immediate" "1")
10151 (set_attr "prefix" "vex")
10152 (set_attr "mode" "V8SF")])
10153
10154 (define_expand "avx_vinsertf128<mode>"
10155 [(match_operand:V_256 0 "register_operand" "")
10156 (match_operand:V_256 1 "register_operand" "")
10157 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
10158 (match_operand:SI 3 "const_0_to_1_operand" "")]
10159 "TARGET_AVX"
10160 {
10161 rtx (*insn)(rtx, rtx, rtx);
10162
10163 switch (INTVAL (operands[3]))
10164 {
10165 case 0:
10166 insn = gen_vec_set_lo_<mode>;
10167 break;
10168 case 1:
10169 insn = gen_vec_set_hi_<mode>;
10170 break;
10171 default:
10172 gcc_unreachable ();
10173 }
10174
10175 emit_insn (insn (operands[0], operands[1], operands[2]));
10176 DONE;
10177 })
10178
10179 (define_insn "vec_set_lo_<mode>"
10180 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10181 (vec_concat:VI8F_256
10182 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10183 (vec_select:<ssehalfvecmode>
10184 (match_operand:VI8F_256 1 "register_operand" "x")
10185 (parallel [(const_int 2) (const_int 3)]))))]
10186 "TARGET_AVX"
10187 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10188 [(set_attr "type" "sselog")
10189 (set_attr "prefix_extra" "1")
10190 (set_attr "length_immediate" "1")
10191 (set_attr "prefix" "vex")
10192 (set_attr "mode" "V8SF")])
10193
10194 (define_insn "vec_set_hi_<mode>"
10195 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10196 (vec_concat:VI8F_256
10197 (vec_select:<ssehalfvecmode>
10198 (match_operand:VI8F_256 1 "register_operand" "x")
10199 (parallel [(const_int 0) (const_int 1)]))
10200 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10201 "TARGET_AVX"
10202 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10203 [(set_attr "type" "sselog")
10204 (set_attr "prefix_extra" "1")
10205 (set_attr "length_immediate" "1")
10206 (set_attr "prefix" "vex")
10207 (set_attr "mode" "V8SF")])
10208
10209 (define_insn "vec_set_lo_<mode>"
10210 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10211 (vec_concat:VI4F_256
10212 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10213 (vec_select:<ssehalfvecmode>
10214 (match_operand:VI4F_256 1 "register_operand" "x")
10215 (parallel [(const_int 4) (const_int 5)
10216 (const_int 6) (const_int 7)]))))]
10217 "TARGET_AVX"
10218 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10219 [(set_attr "type" "sselog")
10220 (set_attr "prefix_extra" "1")
10221 (set_attr "length_immediate" "1")
10222 (set_attr "prefix" "vex")
10223 (set_attr "mode" "V8SF")])
10224
10225 (define_insn "vec_set_hi_<mode>"
10226 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10227 (vec_concat:VI4F_256
10228 (vec_select:<ssehalfvecmode>
10229 (match_operand:VI4F_256 1 "register_operand" "x")
10230 (parallel [(const_int 0) (const_int 1)
10231 (const_int 2) (const_int 3)]))
10232 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10233 "TARGET_AVX"
10234 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10235 [(set_attr "type" "sselog")
10236 (set_attr "prefix_extra" "1")
10237 (set_attr "length_immediate" "1")
10238 (set_attr "prefix" "vex")
10239 (set_attr "mode" "V8SF")])
10240
10241 (define_insn "vec_set_lo_v16hi"
10242 [(set (match_operand:V16HI 0 "register_operand" "=x")
10243 (vec_concat:V16HI
10244 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10245 (vec_select:V8HI
10246 (match_operand:V16HI 1 "register_operand" "x")
10247 (parallel [(const_int 8) (const_int 9)
10248 (const_int 10) (const_int 11)
10249 (const_int 12) (const_int 13)
10250 (const_int 14) (const_int 15)]))))]
10251 "TARGET_AVX"
10252 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10253 [(set_attr "type" "sselog")
10254 (set_attr "prefix_extra" "1")
10255 (set_attr "length_immediate" "1")
10256 (set_attr "prefix" "vex")
10257 (set_attr "mode" "V8SF")])
10258
10259 (define_insn "vec_set_hi_v16hi"
10260 [(set (match_operand:V16HI 0 "register_operand" "=x")
10261 (vec_concat:V16HI
10262 (vec_select:V8HI
10263 (match_operand:V16HI 1 "register_operand" "x")
10264 (parallel [(const_int 0) (const_int 1)
10265 (const_int 2) (const_int 3)
10266 (const_int 4) (const_int 5)
10267 (const_int 6) (const_int 7)]))
10268 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
10269 "TARGET_AVX"
10270 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10271 [(set_attr "type" "sselog")
10272 (set_attr "prefix_extra" "1")
10273 (set_attr "length_immediate" "1")
10274 (set_attr "prefix" "vex")
10275 (set_attr "mode" "V8SF")])
10276
10277 (define_insn "vec_set_lo_v32qi"
10278 [(set (match_operand:V32QI 0 "register_operand" "=x")
10279 (vec_concat:V32QI
10280 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
10281 (vec_select:V16QI
10282 (match_operand:V32QI 1 "register_operand" "x")
10283 (parallel [(const_int 16) (const_int 17)
10284 (const_int 18) (const_int 19)
10285 (const_int 20) (const_int 21)
10286 (const_int 22) (const_int 23)
10287 (const_int 24) (const_int 25)
10288 (const_int 26) (const_int 27)
10289 (const_int 28) (const_int 29)
10290 (const_int 30) (const_int 31)]))))]
10291 "TARGET_AVX"
10292 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10293 [(set_attr "type" "sselog")
10294 (set_attr "prefix_extra" "1")
10295 (set_attr "length_immediate" "1")
10296 (set_attr "prefix" "vex")
10297 (set_attr "mode" "V8SF")])
10298
10299 (define_insn "vec_set_hi_v32qi"
10300 [(set (match_operand:V32QI 0 "register_operand" "=x")
10301 (vec_concat:V32QI
10302 (vec_select:V16QI
10303 (match_operand:V32QI 1 "register_operand" "x")
10304 (parallel [(const_int 0) (const_int 1)
10305 (const_int 2) (const_int 3)
10306 (const_int 4) (const_int 5)
10307 (const_int 6) (const_int 7)
10308 (const_int 8) (const_int 9)
10309 (const_int 10) (const_int 11)
10310 (const_int 12) (const_int 13)
10311 (const_int 14) (const_int 15)]))
10312 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
10313 "TARGET_AVX"
10314 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10315 [(set_attr "type" "sselog")
10316 (set_attr "prefix_extra" "1")
10317 (set_attr "length_immediate" "1")
10318 (set_attr "prefix" "vex")
10319 (set_attr "mode" "V8SF")])
10320
10321 (define_expand "avx_maskload<ssemodesuffix><avxsizesuffix>"
10322 [(set (match_operand:VF 0 "register_operand" "")
10323 (unspec:VF
10324 [(match_operand:<sseintvecmode> 2 "register_operand" "")
10325 (match_operand:VF 1 "memory_operand" "")
10326 (match_dup 0)]
10327 UNSPEC_MASKMOV))]
10328 "TARGET_AVX")
10329
10330 (define_expand "avx_maskstore<ssemodesuffix><avxsizesuffix>"
10331 [(set (match_operand:VF 0 "memory_operand" "")
10332 (unspec:VF
10333 [(match_operand:<sseintvecmode> 1 "register_operand" "")
10334 (match_operand:VF 2 "register_operand" "")
10335 (match_dup 0)]
10336 UNSPEC_MASKMOV))]
10337 "TARGET_AVX")
10338
10339 (define_insn "*avx_maskmov<ssemodesuffix><avxsizesuffix>"
10340 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
10341 (unspec:VF
10342 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
10343 (match_operand:VF 2 "nonimmediate_operand" "m,x")
10344 (match_dup 0)]
10345 UNSPEC_MASKMOV))]
10346 "TARGET_AVX
10347 && (REG_P (operands[0]) == MEM_P (operands[2]))"
10348 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10349 [(set_attr "type" "sselog1")
10350 (set_attr "prefix_extra" "1")
10351 (set_attr "prefix" "vex")
10352 (set_attr "mode" "<MODE>")])
10353
10354 (define_insn_and_split "avx_<ssemodesuffix><avxsizesuffix>_<ssemodesuffix>"
10355 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
10356 (unspec:AVX256MODE2P
10357 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
10358 UNSPEC_CAST))]
10359 "TARGET_AVX"
10360 "#"
10361 "&& reload_completed"
10362 [(const_int 0)]
10363 {
10364 rtx op1 = operands[1];
10365 if (REG_P (op1))
10366 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
10367 else
10368 op1 = gen_lowpart (<MODE>mode, op1);
10369 emit_move_insn (operands[0], op1);
10370 DONE;
10371 })
10372
10373 (define_expand "vec_init<mode>"
10374 [(match_operand:V_256 0 "register_operand" "")
10375 (match_operand 1 "" "")]
10376 "TARGET_AVX"
10377 {
10378 ix86_expand_vector_init (false, operands[0], operands[1]);
10379 DONE;
10380 })
10381
10382 (define_insn "*vec_concat<mode>_avx"
10383 [(set (match_operand:V_256 0 "register_operand" "=x,x")
10384 (vec_concat:V_256
10385 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
10386 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
10387 "TARGET_AVX"
10388 {
10389 switch (which_alternative)
10390 {
10391 case 0:
10392 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
10393 case 1:
10394 switch (get_attr_mode (insn))
10395 {
10396 case MODE_V8SF:
10397 return "vmovaps\t{%1, %x0|%x0, %1}";
10398 case MODE_V4DF:
10399 return "vmovapd\t{%1, %x0|%x0, %1}";
10400 default:
10401 return "vmovdqa\t{%1, %x0|%x0, %1}";
10402 }
10403 default:
10404 gcc_unreachable ();
10405 }
10406 }
10407 [(set_attr "type" "sselog,ssemov")
10408 (set_attr "prefix_extra" "1,*")
10409 (set_attr "length_immediate" "1,*")
10410 (set_attr "prefix" "vex")
10411 (set_attr "mode" "<sseinsnmode>")])
10412
10413 (define_insn "vcvtph2ps"
10414 [(set (match_operand:V4SF 0 "register_operand" "=x")
10415 (vec_select:V4SF
10416 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
10417 UNSPEC_VCVTPH2PS)
10418 (parallel [(const_int 0) (const_int 1)
10419 (const_int 1) (const_int 2)])))]
10420 "TARGET_F16C"
10421 "vcvtph2ps\t{%1, %0|%0, %1}"
10422 [(set_attr "type" "ssecvt")
10423 (set_attr "prefix" "vex")
10424 (set_attr "mode" "V4SF")])
10425
10426 (define_insn "*vcvtph2ps_load"
10427 [(set (match_operand:V4SF 0 "register_operand" "=x")
10428 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
10429 UNSPEC_VCVTPH2PS))]
10430 "TARGET_F16C"
10431 "vcvtph2ps\t{%1, %0|%0, %1}"
10432 [(set_attr "type" "ssecvt")
10433 (set_attr "prefix" "vex")
10434 (set_attr "mode" "V8SF")])
10435
10436 (define_insn "vcvtph2ps256"
10437 [(set (match_operand:V8SF 0 "register_operand" "=x")
10438 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
10439 UNSPEC_VCVTPH2PS))]
10440 "TARGET_F16C"
10441 "vcvtph2ps\t{%1, %0|%0, %1}"
10442 [(set_attr "type" "ssecvt")
10443 (set_attr "prefix" "vex")
10444 (set_attr "mode" "V8SF")])
10445
10446 (define_expand "vcvtps2ph"
10447 [(set (match_operand:V8HI 0 "register_operand" "")
10448 (vec_concat:V8HI
10449 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
10450 (match_operand:SI 2 "immediate_operand" "")]
10451 UNSPEC_VCVTPS2PH)
10452 (match_dup 3)))]
10453 "TARGET_F16C"
10454 "operands[3] = CONST0_RTX (V4HImode);")
10455
10456 (define_insn "*vcvtps2ph"
10457 [(set (match_operand:V8HI 0 "register_operand" "=x")
10458 (vec_concat:V8HI
10459 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
10460 (match_operand:SI 2 "immediate_operand" "N")]
10461 UNSPEC_VCVTPS2PH)
10462 (match_operand:V4HI 3 "const0_operand" "")))]
10463 "TARGET_F16C"
10464 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10465 [(set_attr "type" "ssecvt")
10466 (set_attr "prefix" "vex")
10467 (set_attr "mode" "V4SF")])
10468
10469 (define_insn "*vcvtps2ph_store"
10470 [(set (match_operand:V4HI 0 "memory_operand" "=m")
10471 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
10472 (match_operand:SI 2 "immediate_operand" "N")]
10473 UNSPEC_VCVTPS2PH))]
10474 "TARGET_F16C"
10475 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10476 [(set_attr "type" "ssecvt")
10477 (set_attr "prefix" "vex")
10478 (set_attr "mode" "V4SF")])
10479
10480 (define_insn "vcvtps2ph256"
10481 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
10482 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
10483 (match_operand:SI 2 "immediate_operand" "N")]
10484 UNSPEC_VCVTPS2PH))]
10485 "TARGET_F16C"
10486 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10487 [(set_attr "type" "ssecvt")
10488 (set_attr "prefix" "vex")
10489 (set_attr "mode" "V8SF")])