4606bff208f114ed5ca8a5835098df96faa27eee
[gcc.git] / gcc / config / rs6000 / vsx.md
1 ;; VSX patterns.
2 ;; Copyright (C) 2009-2016 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
4
5 ;; This file is part of GCC.
6
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
11
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
16
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 ;; Iterator for both scalar and vector floating point types supported by VSX
22 (define_mode_iterator VSX_B [DF V4SF V2DF])
23
24 ;; Iterator for the 2 64-bit vector types
25 (define_mode_iterator VSX_D [V2DF V2DI])
26
27 ;; Iterator for the 2 64-bit vector types + 128-bit types that are loaded with
28 ;; lxvd2x to properly handle swapping words on little endian
29 (define_mode_iterator VSX_LE [V2DF V2DI V1TI])
30
31 ;; Mode iterator to handle swapping words on little endian for the 128-bit
32 ;; types that goes in a single vector register.
33 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
34 (TF "FLOAT128_VECTOR_P (TFmode)")
35 (TI "TARGET_VSX_TIMODE")])
36
37 ;; Iterator for the 2 32-bit vector types
38 (define_mode_iterator VSX_W [V4SF V4SI])
39
40 ;; Iterator for the DF types
41 (define_mode_iterator VSX_DF [V2DF DF])
42
43 ;; Iterator for vector floating point types supported by VSX
44 (define_mode_iterator VSX_F [V4SF V2DF])
45
46 ;; Iterator for logical types supported by VSX
47 (define_mode_iterator VSX_L [V16QI
48 V8HI
49 V4SI
50 V2DI
51 V4SF
52 V2DF
53 V1TI
54 TI
55 (KF "FLOAT128_VECTOR_P (KFmode)")
56 (TF "FLOAT128_VECTOR_P (TFmode)")])
57
58 ;; Iterator for memory move. Handle TImode specially to allow
59 ;; it to use gprs as well as vsx registers.
60 (define_mode_iterator VSX_M [V16QI
61 V8HI
62 V4SI
63 V2DI
64 V4SF
65 V2DF
66 V1TI
67 (KF "FLOAT128_VECTOR_P (KFmode)")
68 (TF "FLOAT128_VECTOR_P (TFmode)")])
69
70 (define_mode_iterator VSX_M2 [V16QI
71 V8HI
72 V4SI
73 V2DI
74 V4SF
75 V2DF
76 V1TI
77 (KF "FLOAT128_VECTOR_P (KFmode)")
78 (TF "FLOAT128_VECTOR_P (TFmode)")
79 (TI "TARGET_VSX_TIMODE")])
80
81 ;; Map into the appropriate load/store name based on the type
82 (define_mode_attr VSm [(V16QI "vw4")
83 (V8HI "vw4")
84 (V4SI "vw4")
85 (V4SF "vw4")
86 (V2DF "vd2")
87 (V2DI "vd2")
88 (DF "d")
89 (TF "vd2")
90 (KF "vd2")
91 (V1TI "vd2")
92 (TI "vd2")])
93
94 ;; Map into the appropriate suffix based on the type
95 (define_mode_attr VSs [(V16QI "sp")
96 (V8HI "sp")
97 (V4SI "sp")
98 (V4SF "sp")
99 (V2DF "dp")
100 (V2DI "dp")
101 (DF "dp")
102 (SF "sp")
103 (TF "dp")
104 (KF "dp")
105 (V1TI "dp")
106 (TI "dp")])
107
108 ;; Map the register class used
109 (define_mode_attr VSr [(V16QI "v")
110 (V8HI "v")
111 (V4SI "v")
112 (V4SF "wf")
113 (V2DI "wd")
114 (V2DF "wd")
115 (DI "wi")
116 (DF "ws")
117 (SF "ww")
118 (TF "wp")
119 (KF "wq")
120 (V1TI "v")
121 (TI "wt")])
122
123 ;; Map the register class used for float<->int conversions (floating point side)
124 ;; VSr2 is the preferred register class, VSr3 is any register class that will
125 ;; hold the data
126 (define_mode_attr VSr2 [(V2DF "wd")
127 (V4SF "wf")
128 (DF "ws")
129 (SF "ww")
130 (DI "wi")])
131
132 (define_mode_attr VSr3 [(V2DF "wa")
133 (V4SF "wa")
134 (DF "ws")
135 (SF "ww")
136 (DI "wi")])
137
138 ;; Map the register class for sp<->dp float conversions, destination
139 (define_mode_attr VSr4 [(SF "ws")
140 (DF "f")
141 (V2DF "wd")
142 (V4SF "v")])
143
144 ;; Map the register class for sp<->dp float conversions, source
145 (define_mode_attr VSr5 [(SF "ws")
146 (DF "f")
147 (V2DF "v")
148 (V4SF "wd")])
149
150 ;; The VSX register class that a type can occupy, even if it is not the
151 ;; preferred register class (VSr is the preferred register class that will get
152 ;; allocated first).
153 (define_mode_attr VSa [(V16QI "wa")
154 (V8HI "wa")
155 (V4SI "wa")
156 (V4SF "wa")
157 (V2DI "wa")
158 (V2DF "wa")
159 (DI "wi")
160 (DF "ws")
161 (SF "ww")
162 (V1TI "wa")
163 (TI "wt")
164 (TF "wp")
165 (KF "wq")])
166
167 ;; Same size integer type for floating point data
168 (define_mode_attr VSi [(V4SF "v4si")
169 (V2DF "v2di")
170 (DF "di")])
171
172 (define_mode_attr VSI [(V4SF "V4SI")
173 (V2DF "V2DI")
174 (DF "DI")])
175
176 ;; Word size for same size conversion
177 (define_mode_attr VSc [(V4SF "w")
178 (V2DF "d")
179 (DF "d")])
180
181 ;; Map into either s or v, depending on whether this is a scalar or vector
182 ;; operation
183 (define_mode_attr VSv [(V16QI "v")
184 (V8HI "v")
185 (V4SI "v")
186 (V4SF "v")
187 (V2DI "v")
188 (V2DF "v")
189 (V1TI "v")
190 (DF "s")
191 (KF "v")])
192
193 ;; Appropriate type for add ops (and other simple FP ops)
194 (define_mode_attr VStype_simple [(V2DF "vecdouble")
195 (V4SF "vecfloat")
196 (DF "fp")])
197
198 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
199 (V4SF "fp_addsub_s")
200 (DF "fp_addsub_d")])
201
202 ;; Appropriate type for multiply ops
203 (define_mode_attr VStype_mul [(V2DF "vecdouble")
204 (V4SF "vecfloat")
205 (DF "dmul")])
206
207 (define_mode_attr VSfptype_mul [(V2DF "fp_mul_d")
208 (V4SF "fp_mul_s")
209 (DF "fp_mul_d")])
210
211 ;; Appropriate type for divide ops.
212 (define_mode_attr VStype_div [(V2DF "vecdiv")
213 (V4SF "vecfdiv")
214 (DF "ddiv")])
215
216 (define_mode_attr VSfptype_div [(V2DF "fp_div_d")
217 (V4SF "fp_div_s")
218 (DF "fp_div_d")])
219
220 ;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with
221 ;; the scalar sqrt
222 (define_mode_attr VStype_sqrt [(V2DF "dsqrt")
223 (V4SF "ssqrt")
224 (DF "dsqrt")])
225
226 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
227 (V4SF "fp_sqrt_s")
228 (DF "fp_sqrt_d")])
229
230 ;; Iterator and modes for sp<->dp conversions
231 ;; Because scalar SF values are represented internally as double, use the
232 ;; V4SF type to represent this than SF.
233 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
234
235 (define_mode_attr VS_spdp_res [(DF "V4SF")
236 (V4SF "V2DF")
237 (V2DF "V4SF")])
238
239 (define_mode_attr VS_spdp_insn [(DF "xscvdpsp")
240 (V4SF "xvcvspdp")
241 (V2DF "xvcvdpsp")])
242
243 (define_mode_attr VS_spdp_type [(DF "fp")
244 (V4SF "vecdouble")
245 (V2DF "vecdouble")])
246
247 ;; Map the scalar mode for a vector type
248 (define_mode_attr VS_scalar [(V1TI "TI")
249 (V2DF "DF")
250 (V2DI "DI")
251 (V4SF "SF")
252 (V4SI "SI")
253 (V8HI "HI")
254 (V16QI "QI")])
255
256 ;; Map to a double-sized vector mode
257 (define_mode_attr VS_double [(V4SI "V8SI")
258 (V4SF "V8SF")
259 (V2DI "V4DI")
260 (V2DF "V4DF")
261 (V1TI "V2TI")])
262
263 ;; Map register class for 64-bit element in 128-bit vector for direct moves
264 ;; to/from gprs
265 (define_mode_attr VS_64dm [(V2DF "wk")
266 (V2DI "wj")])
267
268 ;; Map register class for 64-bit element in 128-bit vector for normal register
269 ;; to register moves
270 (define_mode_attr VS_64reg [(V2DF "ws")
271 (V2DI "wi")])
272
273 ;; Constants for creating unspecs
274 (define_c_enum "unspec"
275 [UNSPEC_VSX_CONCAT
276 UNSPEC_VSX_CVDPSXWS
277 UNSPEC_VSX_CVDPUXWS
278 UNSPEC_VSX_CVSPDP
279 UNSPEC_VSX_CVSPDPN
280 UNSPEC_VSX_CVDPSPN
281 UNSPEC_VSX_CVSXWDP
282 UNSPEC_VSX_CVUXWDP
283 UNSPEC_VSX_CVSXDSP
284 UNSPEC_VSX_CVUXDSP
285 UNSPEC_VSX_CVSPSXDS
286 UNSPEC_VSX_CVSPUXDS
287 UNSPEC_VSX_TDIV
288 UNSPEC_VSX_TSQRT
289 UNSPEC_VSX_SET
290 UNSPEC_VSX_ROUND_I
291 UNSPEC_VSX_ROUND_IC
292 UNSPEC_VSX_SLDWI
293 UNSPEC_VSX_XXSPLTW
294 UNSPEC_VSX_XXSPLTD
295 UNSPEC_VSX_DIVSD
296 UNSPEC_VSX_DIVUD
297 UNSPEC_VSX_MULSD
298 UNSPEC_VSX_XVCVSXDDP
299 UNSPEC_VSX_XVCVUXDDP
300 UNSPEC_VSX_XVCVDPSXDS
301 UNSPEC_VSX_XVCVDPUXDS
302 ])
303
304 ;; VSX (P9) moves
305
306 (define_insn "*p9_vecload_<mode>"
307 [(set (match_operand:VSX_M 0 "vsx_register_operand" "=<VSa>")
308 (match_operand:VSX_M 1 "memory_operand" "Z"))]
309 "TARGET_P9_VECTOR"
310 "lxvx %x0,%y1"
311 [(set_attr "type" "vecload")
312 (set_attr "length" "4")])
313
314 (define_insn "*p9_vecstore_<mode>"
315 [(set (match_operand:VSX_M 0 "memory_operand" "=Z")
316 (match_operand:VSX_M 1 "vsx_register_operand" "<VSa>"))]
317 "TARGET_P9_VECTOR"
318 "stxvx %x1,%y0"
319 [(set_attr "type" "vecstore")
320 (set_attr "length" "4")])
321
322 ;; VSX moves
323
324 ;; The patterns for LE permuted loads and stores come before the general
325 ;; VSX moves so they match first.
326 (define_insn_and_split "*vsx_le_perm_load_<mode>"
327 [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
328 (match_operand:VSX_LE 1 "memory_operand" "Z"))]
329 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
330 "#"
331 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
332 [(set (match_dup 2)
333 (vec_select:<MODE>
334 (match_dup 1)
335 (parallel [(const_int 1) (const_int 0)])))
336 (set (match_dup 0)
337 (vec_select:<MODE>
338 (match_dup 2)
339 (parallel [(const_int 1) (const_int 0)])))]
340 "
341 {
342 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
343 : operands[0];
344 }
345 "
346 [(set_attr "type" "vecload")
347 (set_attr "length" "8")])
348
349 (define_insn_and_split "*vsx_le_perm_load_<mode>"
350 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
351 (match_operand:VSX_W 1 "memory_operand" "Z"))]
352 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
353 "#"
354 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
355 [(set (match_dup 2)
356 (vec_select:<MODE>
357 (match_dup 1)
358 (parallel [(const_int 2) (const_int 3)
359 (const_int 0) (const_int 1)])))
360 (set (match_dup 0)
361 (vec_select:<MODE>
362 (match_dup 2)
363 (parallel [(const_int 2) (const_int 3)
364 (const_int 0) (const_int 1)])))]
365 "
366 {
367 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
368 : operands[0];
369 }
370 "
371 [(set_attr "type" "vecload")
372 (set_attr "length" "8")])
373
374 (define_insn_and_split "*vsx_le_perm_load_v8hi"
375 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
376 (match_operand:V8HI 1 "memory_operand" "Z"))]
377 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
378 "#"
379 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
380 [(set (match_dup 2)
381 (vec_select:V8HI
382 (match_dup 1)
383 (parallel [(const_int 4) (const_int 5)
384 (const_int 6) (const_int 7)
385 (const_int 0) (const_int 1)
386 (const_int 2) (const_int 3)])))
387 (set (match_dup 0)
388 (vec_select:V8HI
389 (match_dup 2)
390 (parallel [(const_int 4) (const_int 5)
391 (const_int 6) (const_int 7)
392 (const_int 0) (const_int 1)
393 (const_int 2) (const_int 3)])))]
394 "
395 {
396 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
397 : operands[0];
398 }
399 "
400 [(set_attr "type" "vecload")
401 (set_attr "length" "8")])
402
403 (define_insn_and_split "*vsx_le_perm_load_v16qi"
404 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
405 (match_operand:V16QI 1 "memory_operand" "Z"))]
406 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
407 "#"
408 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
409 [(set (match_dup 2)
410 (vec_select:V16QI
411 (match_dup 1)
412 (parallel [(const_int 8) (const_int 9)
413 (const_int 10) (const_int 11)
414 (const_int 12) (const_int 13)
415 (const_int 14) (const_int 15)
416 (const_int 0) (const_int 1)
417 (const_int 2) (const_int 3)
418 (const_int 4) (const_int 5)
419 (const_int 6) (const_int 7)])))
420 (set (match_dup 0)
421 (vec_select:V16QI
422 (match_dup 2)
423 (parallel [(const_int 8) (const_int 9)
424 (const_int 10) (const_int 11)
425 (const_int 12) (const_int 13)
426 (const_int 14) (const_int 15)
427 (const_int 0) (const_int 1)
428 (const_int 2) (const_int 3)
429 (const_int 4) (const_int 5)
430 (const_int 6) (const_int 7)])))]
431 "
432 {
433 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
434 : operands[0];
435 }
436 "
437 [(set_attr "type" "vecload")
438 (set_attr "length" "8")])
439
440 (define_insn "*vsx_le_perm_store_<mode>"
441 [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
442 (match_operand:VSX_LE 1 "vsx_register_operand" "+<VSa>"))]
443 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
444 "#"
445 [(set_attr "type" "vecstore")
446 (set_attr "length" "12")])
447
448 (define_split
449 [(set (match_operand:VSX_LE 0 "memory_operand" "")
450 (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
451 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
452 [(set (match_dup 2)
453 (vec_select:<MODE>
454 (match_dup 1)
455 (parallel [(const_int 1) (const_int 0)])))
456 (set (match_dup 0)
457 (vec_select:<MODE>
458 (match_dup 2)
459 (parallel [(const_int 1) (const_int 0)])))]
460 {
461 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
462 : operands[1];
463 })
464
465 ;; The post-reload split requires that we re-permute the source
466 ;; register in case it is still live.
467 (define_split
468 [(set (match_operand:VSX_LE 0 "memory_operand" "")
469 (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
470 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
471 [(set (match_dup 1)
472 (vec_select:<MODE>
473 (match_dup 1)
474 (parallel [(const_int 1) (const_int 0)])))
475 (set (match_dup 0)
476 (vec_select:<MODE>
477 (match_dup 1)
478 (parallel [(const_int 1) (const_int 0)])))
479 (set (match_dup 1)
480 (vec_select:<MODE>
481 (match_dup 1)
482 (parallel [(const_int 1) (const_int 0)])))]
483 "")
484
485 (define_insn "*vsx_le_perm_store_<mode>"
486 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
487 (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
488 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
489 "#"
490 [(set_attr "type" "vecstore")
491 (set_attr "length" "12")])
492
493 (define_split
494 [(set (match_operand:VSX_W 0 "memory_operand" "")
495 (match_operand:VSX_W 1 "vsx_register_operand" ""))]
496 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
497 [(set (match_dup 2)
498 (vec_select:<MODE>
499 (match_dup 1)
500 (parallel [(const_int 2) (const_int 3)
501 (const_int 0) (const_int 1)])))
502 (set (match_dup 0)
503 (vec_select:<MODE>
504 (match_dup 2)
505 (parallel [(const_int 2) (const_int 3)
506 (const_int 0) (const_int 1)])))]
507 {
508 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
509 : operands[1];
510 })
511
512 ;; The post-reload split requires that we re-permute the source
513 ;; register in case it is still live.
514 (define_split
515 [(set (match_operand:VSX_W 0 "memory_operand" "")
516 (match_operand:VSX_W 1 "vsx_register_operand" ""))]
517 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
518 [(set (match_dup 1)
519 (vec_select:<MODE>
520 (match_dup 1)
521 (parallel [(const_int 2) (const_int 3)
522 (const_int 0) (const_int 1)])))
523 (set (match_dup 0)
524 (vec_select:<MODE>
525 (match_dup 1)
526 (parallel [(const_int 2) (const_int 3)
527 (const_int 0) (const_int 1)])))
528 (set (match_dup 1)
529 (vec_select:<MODE>
530 (match_dup 1)
531 (parallel [(const_int 2) (const_int 3)
532 (const_int 0) (const_int 1)])))]
533 "")
534
535 (define_insn "*vsx_le_perm_store_v8hi"
536 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
537 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
538 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
539 "#"
540 [(set_attr "type" "vecstore")
541 (set_attr "length" "12")])
542
543 (define_split
544 [(set (match_operand:V8HI 0 "memory_operand" "")
545 (match_operand:V8HI 1 "vsx_register_operand" ""))]
546 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
547 [(set (match_dup 2)
548 (vec_select:V8HI
549 (match_dup 1)
550 (parallel [(const_int 4) (const_int 5)
551 (const_int 6) (const_int 7)
552 (const_int 0) (const_int 1)
553 (const_int 2) (const_int 3)])))
554 (set (match_dup 0)
555 (vec_select:V8HI
556 (match_dup 2)
557 (parallel [(const_int 4) (const_int 5)
558 (const_int 6) (const_int 7)
559 (const_int 0) (const_int 1)
560 (const_int 2) (const_int 3)])))]
561 {
562 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
563 : operands[1];
564 })
565
566 ;; The post-reload split requires that we re-permute the source
567 ;; register in case it is still live.
568 (define_split
569 [(set (match_operand:V8HI 0 "memory_operand" "")
570 (match_operand:V8HI 1 "vsx_register_operand" ""))]
571 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
572 [(set (match_dup 1)
573 (vec_select:V8HI
574 (match_dup 1)
575 (parallel [(const_int 4) (const_int 5)
576 (const_int 6) (const_int 7)
577 (const_int 0) (const_int 1)
578 (const_int 2) (const_int 3)])))
579 (set (match_dup 0)
580 (vec_select:V8HI
581 (match_dup 1)
582 (parallel [(const_int 4) (const_int 5)
583 (const_int 6) (const_int 7)
584 (const_int 0) (const_int 1)
585 (const_int 2) (const_int 3)])))
586 (set (match_dup 1)
587 (vec_select:V8HI
588 (match_dup 1)
589 (parallel [(const_int 4) (const_int 5)
590 (const_int 6) (const_int 7)
591 (const_int 0) (const_int 1)
592 (const_int 2) (const_int 3)])))]
593 "")
594
595 (define_insn "*vsx_le_perm_store_v16qi"
596 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
597 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
598 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
599 "#"
600 [(set_attr "type" "vecstore")
601 (set_attr "length" "12")])
602
603 (define_split
604 [(set (match_operand:V16QI 0 "memory_operand" "")
605 (match_operand:V16QI 1 "vsx_register_operand" ""))]
606 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
607 [(set (match_dup 2)
608 (vec_select:V16QI
609 (match_dup 1)
610 (parallel [(const_int 8) (const_int 9)
611 (const_int 10) (const_int 11)
612 (const_int 12) (const_int 13)
613 (const_int 14) (const_int 15)
614 (const_int 0) (const_int 1)
615 (const_int 2) (const_int 3)
616 (const_int 4) (const_int 5)
617 (const_int 6) (const_int 7)])))
618 (set (match_dup 0)
619 (vec_select:V16QI
620 (match_dup 2)
621 (parallel [(const_int 8) (const_int 9)
622 (const_int 10) (const_int 11)
623 (const_int 12) (const_int 13)
624 (const_int 14) (const_int 15)
625 (const_int 0) (const_int 1)
626 (const_int 2) (const_int 3)
627 (const_int 4) (const_int 5)
628 (const_int 6) (const_int 7)])))]
629 {
630 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
631 : operands[1];
632 })
633
634 ;; The post-reload split requires that we re-permute the source
635 ;; register in case it is still live.
636 (define_split
637 [(set (match_operand:V16QI 0 "memory_operand" "")
638 (match_operand:V16QI 1 "vsx_register_operand" ""))]
639 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
640 [(set (match_dup 1)
641 (vec_select:V16QI
642 (match_dup 1)
643 (parallel [(const_int 8) (const_int 9)
644 (const_int 10) (const_int 11)
645 (const_int 12) (const_int 13)
646 (const_int 14) (const_int 15)
647 (const_int 0) (const_int 1)
648 (const_int 2) (const_int 3)
649 (const_int 4) (const_int 5)
650 (const_int 6) (const_int 7)])))
651 (set (match_dup 0)
652 (vec_select:V16QI
653 (match_dup 1)
654 (parallel [(const_int 8) (const_int 9)
655 (const_int 10) (const_int 11)
656 (const_int 12) (const_int 13)
657 (const_int 14) (const_int 15)
658 (const_int 0) (const_int 1)
659 (const_int 2) (const_int 3)
660 (const_int 4) (const_int 5)
661 (const_int 6) (const_int 7)])))
662 (set (match_dup 1)
663 (vec_select:V16QI
664 (match_dup 1)
665 (parallel [(const_int 8) (const_int 9)
666 (const_int 10) (const_int 11)
667 (const_int 12) (const_int 13)
668 (const_int 14) (const_int 15)
669 (const_int 0) (const_int 1)
670 (const_int 2) (const_int 3)
671 (const_int 4) (const_int 5)
672 (const_int 6) (const_int 7)])))]
673 "")
674
675 ;; Little endian word swapping for 128-bit types that are either scalars or the
676 ;; special V1TI container class, which it is not appropriate to use vec_select
677 ;; for the type.
678 (define_insn "*vsx_le_permute_<mode>"
679 [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
680 (rotate:VSX_LE_128
681 (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
682 (const_int 64)))]
683 "!BYTES_BIG_ENDIAN && TARGET_VSX"
684 "@
685 xxpermdi %x0,%x1,%x1,2
686 lxvd2x %x0,%y1
687 stxvd2x %x1,%y0"
688 [(set_attr "length" "4")
689 (set_attr "type" "vecperm,vecload,vecstore")])
690
691 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
692 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
693 (rotate:VSX_LE_128
694 (rotate:VSX_LE_128
695 (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
696 (const_int 64))
697 (const_int 64)))]
698 "!BYTES_BIG_ENDIAN && TARGET_VSX"
699 "@
700 #
701 xxlor %x0,%x1"
702 ""
703 [(set (match_dup 0) (match_dup 1))]
704 {
705 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
706 {
707 emit_note (NOTE_INSN_DELETED);
708 DONE;
709 }
710 }
711 [(set_attr "length" "0,4")
712 (set_attr "type" "vecsimple")])
713
714 (define_insn_and_split "*vsx_le_perm_load_<mode>"
715 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>")
716 (match_operand:VSX_LE_128 1 "memory_operand" "Z"))]
717 "!BYTES_BIG_ENDIAN && TARGET_VSX"
718 "#"
719 "!BYTES_BIG_ENDIAN && TARGET_VSX"
720 [(set (match_dup 2)
721 (rotate:VSX_LE_128 (match_dup 1)
722 (const_int 64)))
723 (set (match_dup 0)
724 (rotate:VSX_LE_128 (match_dup 2)
725 (const_int 64)))]
726 "
727 {
728 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
729 : operands[0];
730 }
731 "
732 [(set_attr "type" "vecload")
733 (set_attr "length" "8")])
734
735 (define_insn "*vsx_le_perm_store_<mode>"
736 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z")
737 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>"))]
738 "!BYTES_BIG_ENDIAN && TARGET_VSX"
739 "#"
740 [(set_attr "type" "vecstore")
741 (set_attr "length" "12")])
742
743 (define_split
744 [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
745 (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
746 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
747 [(set (match_dup 2)
748 (rotate:VSX_LE_128 (match_dup 1)
749 (const_int 64)))
750 (set (match_dup 0)
751 (rotate:VSX_LE_128 (match_dup 2)
752 (const_int 64)))]
753 {
754 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
755 : operands[0];
756 })
757
758 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
759 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
760 ;; floating point are handled by the more generic swap elimination pass.
761 (define_peephole2
762 [(set (match_operand:TI 0 "vsx_register_operand" "")
763 (rotate:TI (match_operand:TI 1 "vsx_register_operand" "")
764 (const_int 64)))
765 (set (match_operand:TI 2 "vsx_register_operand" "")
766 (rotate:TI (match_dup 0)
767 (const_int 64)))]
768 "!BYTES_BIG_ENDIAN && TARGET_VSX && TARGET_VSX_TIMODE
769 && (rtx_equal_p (operands[0], operands[2])
770 || peep2_reg_dead_p (2, operands[0]))"
771 [(set (match_dup 2) (match_dup 1))])
772
773 ;; The post-reload split requires that we re-permute the source
774 ;; register in case it is still live.
775 (define_split
776 [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
777 (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
778 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
779 [(set (match_dup 1)
780 (rotate:VSX_LE_128 (match_dup 1)
781 (const_int 64)))
782 (set (match_dup 0)
783 (rotate:VSX_LE_128 (match_dup 1)
784 (const_int 64)))
785 (set (match_dup 1)
786 (rotate:VSX_LE_128 (match_dup 1)
787 (const_int 64)))]
788 "")
789
790 (define_insn "*vsx_mov<mode>"
791 [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?<VSa>,?<VSa>,r,we,wQ,?&r,??Y,??r,??r,<VSr>,?<VSa>,*r,v,wZ,v")
792 (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,<VSa>,Z,<VSa>,we,b,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
793 "VECTOR_MEM_VSX_P (<MODE>mode)
794 && (register_operand (operands[0], <MODE>mode)
795 || register_operand (operands[1], <MODE>mode))"
796 {
797 return rs6000_output_move_128bit (operands);
798 }
799 [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,mffgpr,mftgpr,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
800 (set_attr "length" "4,4,4,4,4,4,8,4,12,12,12,12,16,4,4,*,16,4,4")])
801
802 ;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
803 ;; use of TImode is for unions. However for plain data movement, slightly
804 ;; favor the vector loads
805 (define_insn "*vsx_movti_64bit"
806 [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,r,we,v,v,wZ,wQ,&r,Y,r,r,?r")
807 (match_operand:TI 1 "input_operand" "wa,Z,wa,O,we,b,W,wZ,v,r,wQ,r,Y,r,n"))]
808 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
809 && (register_operand (operands[0], TImode)
810 || register_operand (operands[1], TImode))"
811 {
812 return rs6000_output_move_128bit (operands);
813 }
814 [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,mffgpr,mftgpr,vecsimple,vecstore,vecload,store,load,store,load,*,*")
815 (set_attr "length" "4,4,4,4,8,4,16,4,4,8,8,8,8,8,8")])
816
817 (define_insn "*vsx_movti_32bit"
818 [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
819 (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v,r,r, Q, Y, r,n"))]
820 "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
821 && (register_operand (operands[0], TImode)
822 || register_operand (operands[1], TImode))"
823 {
824 switch (which_alternative)
825 {
826 case 0:
827 return "stxvd2x %x1,%y0";
828
829 case 1:
830 return "lxvd2x %x0,%y1";
831
832 case 2:
833 return "xxlor %x0,%x1,%x1";
834
835 case 3:
836 return "xxlxor %x0,%x0,%x0";
837
838 case 4:
839 return output_vec_const_move (operands);
840
841 case 5:
842 return "stvx %1,%y0";
843
844 case 6:
845 return "lvx %0,%y1";
846
847 case 7:
848 if (TARGET_STRING)
849 return \"stswi %1,%P0,16\";
850
851 case 8:
852 return \"#\";
853
854 case 9:
855 /* If the address is not used in the output, we can use lsi. Otherwise,
856 fall through to generating four loads. */
857 if (TARGET_STRING
858 && ! reg_overlap_mentioned_p (operands[0], operands[1]))
859 return \"lswi %0,%P1,16\";
860 /* ... fall through ... */
861
862 case 10:
863 case 11:
864 case 12:
865 return \"#\";
866 default:
867 gcc_unreachable ();
868 }
869 }
870 [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,store,load,load, *, *")
871 (set_attr "update" " *, *, *, *, *, *, *, yes, yes, yes, yes, *, *")
872 (set_attr "length" " 4, 4, 4, 4, 8, 4, 4, 16, 16, 16, 16,16,16")
873 (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
874 (const_string "always")
875 (const_string "conditional")))])
876
877 ;; Explicit load/store expanders for the builtin functions
878 (define_expand "vsx_load_<mode>"
879 [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
880 (match_operand:VSX_M 1 "memory_operand" ""))]
881 "VECTOR_MEM_VSX_P (<MODE>mode)"
882 "")
883
884 (define_expand "vsx_store_<mode>"
885 [(set (match_operand:VSX_M 0 "memory_operand" "")
886 (match_operand:VSX_M 1 "vsx_register_operand" ""))]
887 "VECTOR_MEM_VSX_P (<MODE>mode)"
888 "")
889
890 \f
891 ;; VSX vector floating point arithmetic instructions. The VSX scalar
892 ;; instructions are now combined with the insn for the traditional floating
893 ;; point unit.
894 (define_insn "*vsx_add<mode>3"
895 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
896 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
897 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
898 "VECTOR_UNIT_VSX_P (<MODE>mode)"
899 "xvadd<VSs> %x0,%x1,%x2"
900 [(set_attr "type" "<VStype_simple>")
901 (set_attr "fp_type" "<VSfptype_simple>")])
902
903 (define_insn "*vsx_sub<mode>3"
904 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
905 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
906 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
907 "VECTOR_UNIT_VSX_P (<MODE>mode)"
908 "xvsub<VSs> %x0,%x1,%x2"
909 [(set_attr "type" "<VStype_simple>")
910 (set_attr "fp_type" "<VSfptype_simple>")])
911
912 (define_insn "*vsx_mul<mode>3"
913 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
914 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
915 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
916 "VECTOR_UNIT_VSX_P (<MODE>mode)"
917 "xvmul<VSs> %x0,%x1,%x2"
918 [(set_attr "type" "<VStype_simple>")
919 (set_attr "fp_type" "<VSfptype_mul>")])
920
921 ; Emulate vector with scalar for vec_mul in V2DImode
922 (define_insn_and_split "vsx_mul_v2di"
923 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
924 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
925 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
926 UNSPEC_VSX_MULSD))]
927 "VECTOR_MEM_VSX_P (V2DImode)"
928 "#"
929 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
930 [(const_int 0)]
931 "
932 {
933 rtx op0 = operands[0];
934 rtx op1 = operands[1];
935 rtx op2 = operands[2];
936 rtx op3 = gen_reg_rtx (DImode);
937 rtx op4 = gen_reg_rtx (DImode);
938 rtx op5 = gen_reg_rtx (DImode);
939 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
940 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
941 emit_insn (gen_muldi3 (op5, op3, op4));
942 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
943 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
944 emit_insn (gen_muldi3 (op3, op3, op4));
945 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
946 DONE;
947 }"
948 [(set_attr "type" "mul")])
949
950 (define_insn "*vsx_div<mode>3"
951 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
952 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
953 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
954 "VECTOR_UNIT_VSX_P (<MODE>mode)"
955 "xvdiv<VSs> %x0,%x1,%x2"
956 [(set_attr "type" "<VStype_div>")
957 (set_attr "fp_type" "<VSfptype_div>")])
958
959 ; Emulate vector with scalar for vec_div in V2DImode
960 (define_insn_and_split "vsx_div_v2di"
961 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
962 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
963 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
964 UNSPEC_VSX_DIVSD))]
965 "VECTOR_MEM_VSX_P (V2DImode)"
966 "#"
967 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
968 [(const_int 0)]
969 "
970 {
971 rtx op0 = operands[0];
972 rtx op1 = operands[1];
973 rtx op2 = operands[2];
974 rtx op3 = gen_reg_rtx (DImode);
975 rtx op4 = gen_reg_rtx (DImode);
976 rtx op5 = gen_reg_rtx (DImode);
977 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
978 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
979 emit_insn (gen_divdi3 (op5, op3, op4));
980 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
981 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
982 emit_insn (gen_divdi3 (op3, op3, op4));
983 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
984 DONE;
985 }"
986 [(set_attr "type" "div")])
987
988 (define_insn_and_split "vsx_udiv_v2di"
989 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
990 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
991 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
992 UNSPEC_VSX_DIVUD))]
993 "VECTOR_MEM_VSX_P (V2DImode)"
994 "#"
995 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
996 [(const_int 0)]
997 "
998 {
999 rtx op0 = operands[0];
1000 rtx op1 = operands[1];
1001 rtx op2 = operands[2];
1002 rtx op3 = gen_reg_rtx (DImode);
1003 rtx op4 = gen_reg_rtx (DImode);
1004 rtx op5 = gen_reg_rtx (DImode);
1005 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1006 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1007 emit_insn (gen_udivdi3 (op5, op3, op4));
1008 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1009 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1010 emit_insn (gen_udivdi3 (op3, op3, op4));
1011 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1012 DONE;
1013 }"
1014 [(set_attr "type" "div")])
1015
1016 ;; *tdiv* instruction returning the FG flag
1017 (define_expand "vsx_tdiv<mode>3_fg"
1018 [(set (match_dup 3)
1019 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1020 (match_operand:VSX_B 2 "vsx_register_operand" "")]
1021 UNSPEC_VSX_TDIV))
1022 (set (match_operand:SI 0 "gpc_reg_operand" "")
1023 (gt:SI (match_dup 3)
1024 (const_int 0)))]
1025 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1026 {
1027 operands[3] = gen_reg_rtx (CCFPmode);
1028 })
1029
1030 ;; *tdiv* instruction returning the FE flag
1031 (define_expand "vsx_tdiv<mode>3_fe"
1032 [(set (match_dup 3)
1033 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1034 (match_operand:VSX_B 2 "vsx_register_operand" "")]
1035 UNSPEC_VSX_TDIV))
1036 (set (match_operand:SI 0 "gpc_reg_operand" "")
1037 (eq:SI (match_dup 3)
1038 (const_int 0)))]
1039 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1040 {
1041 operands[3] = gen_reg_rtx (CCFPmode);
1042 })
1043
1044 (define_insn "*vsx_tdiv<mode>3_internal"
1045 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1046 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1047 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1048 UNSPEC_VSX_TDIV))]
1049 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1050 "x<VSv>tdiv<VSs> %0,%x1,%x2"
1051 [(set_attr "type" "<VStype_simple>")
1052 (set_attr "fp_type" "<VSfptype_simple>")])
1053
1054 (define_insn "vsx_fre<mode>2"
1055 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1056 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1057 UNSPEC_FRES))]
1058 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1059 "xvre<VSs> %x0,%x1"
1060 [(set_attr "type" "<VStype_simple>")
1061 (set_attr "fp_type" "<VSfptype_simple>")])
1062
1063 (define_insn "*vsx_neg<mode>2"
1064 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1065 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1066 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1067 "xvneg<VSs> %x0,%x1"
1068 [(set_attr "type" "<VStype_simple>")
1069 (set_attr "fp_type" "<VSfptype_simple>")])
1070
1071 (define_insn "*vsx_abs<mode>2"
1072 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1073 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1074 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1075 "xvabs<VSs> %x0,%x1"
1076 [(set_attr "type" "<VStype_simple>")
1077 (set_attr "fp_type" "<VSfptype_simple>")])
1078
1079 (define_insn "vsx_nabs<mode>2"
1080 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1081 (neg:VSX_F
1082 (abs:VSX_F
1083 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1084 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1085 "xvnabs<VSs> %x0,%x1"
1086 [(set_attr "type" "<VStype_simple>")
1087 (set_attr "fp_type" "<VSfptype_simple>")])
1088
1089 (define_insn "vsx_smax<mode>3"
1090 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1091 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1092 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1093 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1094 "xvmax<VSs> %x0,%x1,%x2"
1095 [(set_attr "type" "<VStype_simple>")
1096 (set_attr "fp_type" "<VSfptype_simple>")])
1097
1098 (define_insn "*vsx_smin<mode>3"
1099 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1100 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1101 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1102 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1103 "xvmin<VSs> %x0,%x1,%x2"
1104 [(set_attr "type" "<VStype_simple>")
1105 (set_attr "fp_type" "<VSfptype_simple>")])
1106
1107 (define_insn "*vsx_sqrt<mode>2"
1108 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1109 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1110 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1111 "xvsqrt<VSs> %x0,%x1"
1112 [(set_attr "type" "<VStype_sqrt>")
1113 (set_attr "fp_type" "<VSfptype_sqrt>")])
1114
1115 (define_insn "*vsx_rsqrte<mode>2"
1116 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1117 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1118 UNSPEC_RSQRT))]
1119 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1120 "xvrsqrte<VSs> %x0,%x1"
1121 [(set_attr "type" "<VStype_simple>")
1122 (set_attr "fp_type" "<VSfptype_simple>")])
1123
1124 ;; *tsqrt* returning the fg flag
1125 (define_expand "vsx_tsqrt<mode>2_fg"
1126 [(set (match_dup 3)
1127 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1128 UNSPEC_VSX_TSQRT))
1129 (set (match_operand:SI 0 "gpc_reg_operand" "")
1130 (gt:SI (match_dup 3)
1131 (const_int 0)))]
1132 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1133 {
1134 operands[3] = gen_reg_rtx (CCFPmode);
1135 })
1136
1137 ;; *tsqrt* returning the fe flag
1138 (define_expand "vsx_tsqrt<mode>2_fe"
1139 [(set (match_dup 3)
1140 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1141 UNSPEC_VSX_TSQRT))
1142 (set (match_operand:SI 0 "gpc_reg_operand" "")
1143 (eq:SI (match_dup 3)
1144 (const_int 0)))]
1145 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1146 {
1147 operands[3] = gen_reg_rtx (CCFPmode);
1148 })
1149
1150 (define_insn "*vsx_tsqrt<mode>2_internal"
1151 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1152 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1153 UNSPEC_VSX_TSQRT))]
1154 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1155 "x<VSv>tsqrt<VSs> %0,%x1"
1156 [(set_attr "type" "<VStype_simple>")
1157 (set_attr "fp_type" "<VSfptype_simple>")])
1158
1159 ;; Fused vector multiply/add instructions. Support the classical Altivec
1160 ;; versions of fma, which allows the target to be a separate register from the
1161 ;; 3 inputs. Under VSX, the target must be either the addend or the first
1162 ;; multiply.
1163
1164 (define_insn "*vsx_fmav4sf4"
1165 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1166 (fma:V4SF
1167 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1168 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1169 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1170 "VECTOR_UNIT_VSX_P (V4SFmode)"
1171 "@
1172 xvmaddasp %x0,%x1,%x2
1173 xvmaddmsp %x0,%x1,%x3
1174 xvmaddasp %x0,%x1,%x2
1175 xvmaddmsp %x0,%x1,%x3
1176 vmaddfp %0,%1,%2,%3"
1177 [(set_attr "type" "vecfloat")])
1178
1179 (define_insn "*vsx_fmav2df4"
1180 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1181 (fma:V2DF
1182 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1183 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1184 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1185 "VECTOR_UNIT_VSX_P (V2DFmode)"
1186 "@
1187 xvmaddadp %x0,%x1,%x2
1188 xvmaddmdp %x0,%x1,%x3
1189 xvmaddadp %x0,%x1,%x2
1190 xvmaddmdp %x0,%x1,%x3"
1191 [(set_attr "type" "vecdouble")])
1192
1193 (define_insn "*vsx_fms<mode>4"
1194 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1195 (fma:VSX_F
1196 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1197 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1198 (neg:VSX_F
1199 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1200 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1201 "@
1202 xvmsuba<VSs> %x0,%x1,%x2
1203 xvmsubm<VSs> %x0,%x1,%x3
1204 xvmsuba<VSs> %x0,%x1,%x2
1205 xvmsubm<VSs> %x0,%x1,%x3"
1206 [(set_attr "type" "<VStype_mul>")])
1207
1208 (define_insn "*vsx_nfma<mode>4"
1209 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1210 (neg:VSX_F
1211 (fma:VSX_F
1212 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1213 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1214 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1215 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1216 "@
1217 xvnmadda<VSs> %x0,%x1,%x2
1218 xvnmaddm<VSs> %x0,%x1,%x3
1219 xvnmadda<VSs> %x0,%x1,%x2
1220 xvnmaddm<VSs> %x0,%x1,%x3"
1221 [(set_attr "type" "<VStype_mul>")
1222 (set_attr "fp_type" "<VSfptype_mul>")])
1223
1224 (define_insn "*vsx_nfmsv4sf4"
1225 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1226 (neg:V4SF
1227 (fma:V4SF
1228 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1229 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1230 (neg:V4SF
1231 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1232 "VECTOR_UNIT_VSX_P (V4SFmode)"
1233 "@
1234 xvnmsubasp %x0,%x1,%x2
1235 xvnmsubmsp %x0,%x1,%x3
1236 xvnmsubasp %x0,%x1,%x2
1237 xvnmsubmsp %x0,%x1,%x3
1238 vnmsubfp %0,%1,%2,%3"
1239 [(set_attr "type" "vecfloat")])
1240
1241 (define_insn "*vsx_nfmsv2df4"
1242 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1243 (neg:V2DF
1244 (fma:V2DF
1245 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1246 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1247 (neg:V2DF
1248 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1249 "VECTOR_UNIT_VSX_P (V2DFmode)"
1250 "@
1251 xvnmsubadp %x0,%x1,%x2
1252 xvnmsubmdp %x0,%x1,%x3
1253 xvnmsubadp %x0,%x1,%x2
1254 xvnmsubmdp %x0,%x1,%x3"
1255 [(set_attr "type" "vecdouble")])
1256
1257 ;; Vector conditional expressions (no scalar version for these instructions)
1258 (define_insn "vsx_eq<mode>"
1259 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1260 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1261 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1262 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1263 "xvcmpeq<VSs> %x0,%x1,%x2"
1264 [(set_attr "type" "<VStype_simple>")
1265 (set_attr "fp_type" "<VSfptype_simple>")])
1266
1267 (define_insn "vsx_gt<mode>"
1268 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1269 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1270 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1271 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1272 "xvcmpgt<VSs> %x0,%x1,%x2"
1273 [(set_attr "type" "<VStype_simple>")
1274 (set_attr "fp_type" "<VSfptype_simple>")])
1275
1276 (define_insn "*vsx_ge<mode>"
1277 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1278 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1279 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1280 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1281 "xvcmpge<VSs> %x0,%x1,%x2"
1282 [(set_attr "type" "<VStype_simple>")
1283 (set_attr "fp_type" "<VSfptype_simple>")])
1284
1285 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1286 ;; indicate a combined status
1287 (define_insn "*vsx_eq_<mode>_p"
1288 [(set (reg:CC 74)
1289 (unspec:CC
1290 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1291 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1292 UNSPEC_PREDICATE))
1293 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1294 (eq:VSX_F (match_dup 1)
1295 (match_dup 2)))]
1296 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1297 "xvcmpeq<VSs>. %x0,%x1,%x2"
1298 [(set_attr "type" "<VStype_simple>")])
1299
1300 (define_insn "*vsx_gt_<mode>_p"
1301 [(set (reg:CC 74)
1302 (unspec:CC
1303 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1304 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1305 UNSPEC_PREDICATE))
1306 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1307 (gt:VSX_F (match_dup 1)
1308 (match_dup 2)))]
1309 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1310 "xvcmpgt<VSs>. %x0,%x1,%x2"
1311 [(set_attr "type" "<VStype_simple>")])
1312
1313 (define_insn "*vsx_ge_<mode>_p"
1314 [(set (reg:CC 74)
1315 (unspec:CC
1316 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1317 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1318 UNSPEC_PREDICATE))
1319 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1320 (ge:VSX_F (match_dup 1)
1321 (match_dup 2)))]
1322 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1323 "xvcmpge<VSs>. %x0,%x1,%x2"
1324 [(set_attr "type" "<VStype_simple>")])
1325
1326 ;; Vector select
1327 (define_insn "*vsx_xxsel<mode>"
1328 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1329 (if_then_else:VSX_L
1330 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1331 (match_operand:VSX_L 4 "zero_constant" ""))
1332 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1333 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1334 "VECTOR_MEM_VSX_P (<MODE>mode)"
1335 "xxsel %x0,%x3,%x2,%x1"
1336 [(set_attr "type" "vecperm")])
1337
1338 (define_insn "*vsx_xxsel<mode>_uns"
1339 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1340 (if_then_else:VSX_L
1341 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1342 (match_operand:VSX_L 4 "zero_constant" ""))
1343 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1344 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1345 "VECTOR_MEM_VSX_P (<MODE>mode)"
1346 "xxsel %x0,%x3,%x2,%x1"
1347 [(set_attr "type" "vecperm")])
1348
1349 ;; Copy sign
1350 (define_insn "vsx_copysign<mode>3"
1351 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1352 (unspec:VSX_F
1353 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1354 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
1355 UNSPEC_COPYSIGN))]
1356 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1357 "xvcpsgn<VSs> %x0,%x2,%x1"
1358 [(set_attr "type" "<VStype_simple>")
1359 (set_attr "fp_type" "<VSfptype_simple>")])
1360
1361 ;; For the conversions, limit the register class for the integer value to be
1362 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
1363 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
1364 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
1365 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
1366 ;; in allowing virtual registers.
1367 (define_insn "vsx_float<VSi><mode>2"
1368 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1369 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1370 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1371 "xvcvsx<VSc><VSs> %x0,%x1"
1372 [(set_attr "type" "<VStype_simple>")
1373 (set_attr "fp_type" "<VSfptype_simple>")])
1374
1375 (define_insn "vsx_floatuns<VSi><mode>2"
1376 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1377 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1378 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1379 "xvcvux<VSc><VSs> %x0,%x1"
1380 [(set_attr "type" "<VStype_simple>")
1381 (set_attr "fp_type" "<VSfptype_simple>")])
1382
1383 (define_insn "vsx_fix_trunc<mode><VSi>2"
1384 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1385 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1386 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1387 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
1388 [(set_attr "type" "<VStype_simple>")
1389 (set_attr "fp_type" "<VSfptype_simple>")])
1390
1391 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
1392 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1393 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1394 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1395 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
1396 [(set_attr "type" "<VStype_simple>")
1397 (set_attr "fp_type" "<VSfptype_simple>")])
1398
1399 ;; Math rounding functions
1400 (define_insn "vsx_x<VSv>r<VSs>i"
1401 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1402 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1403 UNSPEC_VSX_ROUND_I))]
1404 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1405 "x<VSv>r<VSs>i %x0,%x1"
1406 [(set_attr "type" "<VStype_simple>")
1407 (set_attr "fp_type" "<VSfptype_simple>")])
1408
1409 (define_insn "vsx_x<VSv>r<VSs>ic"
1410 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1411 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1412 UNSPEC_VSX_ROUND_IC))]
1413 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1414 "x<VSv>r<VSs>ic %x0,%x1"
1415 [(set_attr "type" "<VStype_simple>")
1416 (set_attr "fp_type" "<VSfptype_simple>")])
1417
1418 (define_insn "vsx_btrunc<mode>2"
1419 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1420 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1421 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1422 "xvr<VSs>iz %x0,%x1"
1423 [(set_attr "type" "<VStype_simple>")
1424 (set_attr "fp_type" "<VSfptype_simple>")])
1425
1426 (define_insn "*vsx_b2trunc<mode>2"
1427 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1428 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1429 UNSPEC_FRIZ))]
1430 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1431 "x<VSv>r<VSs>iz %x0,%x1"
1432 [(set_attr "type" "<VStype_simple>")
1433 (set_attr "fp_type" "<VSfptype_simple>")])
1434
1435 (define_insn "vsx_floor<mode>2"
1436 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1437 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1438 UNSPEC_FRIM))]
1439 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1440 "xvr<VSs>im %x0,%x1"
1441 [(set_attr "type" "<VStype_simple>")
1442 (set_attr "fp_type" "<VSfptype_simple>")])
1443
1444 (define_insn "vsx_ceil<mode>2"
1445 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1446 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1447 UNSPEC_FRIP))]
1448 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1449 "xvr<VSs>ip %x0,%x1"
1450 [(set_attr "type" "<VStype_simple>")
1451 (set_attr "fp_type" "<VSfptype_simple>")])
1452
1453 \f
1454 ;; VSX convert to/from double vector
1455
1456 ;; Convert between single and double precision
1457 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
1458 ;; scalar single precision instructions internally use the double format.
1459 ;; Prefer the altivec registers, since we likely will need to do a vperm
1460 (define_insn "vsx_<VS_spdp_insn>"
1461 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
1462 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
1463 UNSPEC_VSX_CVSPDP))]
1464 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1465 "<VS_spdp_insn> %x0,%x1"
1466 [(set_attr "type" "<VS_spdp_type>")])
1467
1468 ;; xscvspdp, represent the scalar SF type as V4SF
1469 (define_insn "vsx_xscvspdp"
1470 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1471 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1472 UNSPEC_VSX_CVSPDP))]
1473 "VECTOR_UNIT_VSX_P (V4SFmode)"
1474 "xscvspdp %x0,%x1"
1475 [(set_attr "type" "fp")])
1476
1477 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
1478 ;; format of scalars is actually DF.
1479 (define_insn "vsx_xscvdpsp_scalar"
1480 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1481 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
1482 UNSPEC_VSX_CVSPDP))]
1483 "VECTOR_UNIT_VSX_P (V4SFmode)"
1484 "xscvdpsp %x0,%x1"
1485 [(set_attr "type" "fp")])
1486
1487 ;; Same as vsx_xscvspdp, but use SF as the type
1488 (define_insn "vsx_xscvspdp_scalar2"
1489 [(set (match_operand:SF 0 "vsx_register_operand" "=f")
1490 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1491 UNSPEC_VSX_CVSPDP))]
1492 "VECTOR_UNIT_VSX_P (V4SFmode)"
1493 "xscvspdp %x0,%x1"
1494 [(set_attr "type" "fp")])
1495
1496 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
1497 (define_insn "vsx_xscvdpspn"
1498 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww,?ww")
1499 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
1500 UNSPEC_VSX_CVDPSPN))]
1501 "TARGET_XSCVDPSPN"
1502 "xscvdpspn %x0,%x1"
1503 [(set_attr "type" "fp")])
1504
1505 (define_insn "vsx_xscvspdpn"
1506 [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?ws")
1507 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wf,wa")]
1508 UNSPEC_VSX_CVSPDPN))]
1509 "TARGET_XSCVSPDPN"
1510 "xscvspdpn %x0,%x1"
1511 [(set_attr "type" "fp")])
1512
1513 (define_insn "vsx_xscvdpspn_scalar"
1514 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,?wa")
1515 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww,ww")]
1516 UNSPEC_VSX_CVDPSPN))]
1517 "TARGET_XSCVDPSPN"
1518 "xscvdpspn %x0,%x1"
1519 [(set_attr "type" "fp")])
1520
1521 ;; Used by direct move to move a SFmode value from GPR to VSX register
1522 (define_insn "vsx_xscvspdpn_directmove"
1523 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
1524 (unspec:SF [(match_operand:DI 1 "vsx_register_operand" "wa")]
1525 UNSPEC_VSX_CVSPDPN))]
1526 "TARGET_XSCVSPDPN"
1527 "xscvspdpn %x0,%x1"
1528 [(set_attr "type" "fp")])
1529
1530 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
1531
1532 (define_expand "vsx_xvcvsxddp_scale"
1533 [(match_operand:V2DF 0 "vsx_register_operand" "")
1534 (match_operand:V2DI 1 "vsx_register_operand" "")
1535 (match_operand:QI 2 "immediate_operand" "")]
1536 "VECTOR_UNIT_VSX_P (V2DFmode)"
1537 {
1538 rtx op0 = operands[0];
1539 rtx op1 = operands[1];
1540 int scale = INTVAL(operands[2]);
1541 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
1542 if (scale != 0)
1543 rs6000_scale_v2df (op0, op0, -scale);
1544 DONE;
1545 })
1546
1547 (define_insn "vsx_xvcvsxddp"
1548 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1549 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1550 UNSPEC_VSX_XVCVSXDDP))]
1551 "VECTOR_UNIT_VSX_P (V2DFmode)"
1552 "xvcvsxddp %x0,%x1"
1553 [(set_attr "type" "vecdouble")])
1554
1555 (define_expand "vsx_xvcvuxddp_scale"
1556 [(match_operand:V2DF 0 "vsx_register_operand" "")
1557 (match_operand:V2DI 1 "vsx_register_operand" "")
1558 (match_operand:QI 2 "immediate_operand" "")]
1559 "VECTOR_UNIT_VSX_P (V2DFmode)"
1560 {
1561 rtx op0 = operands[0];
1562 rtx op1 = operands[1];
1563 int scale = INTVAL(operands[2]);
1564 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
1565 if (scale != 0)
1566 rs6000_scale_v2df (op0, op0, -scale);
1567 DONE;
1568 })
1569
1570 (define_insn "vsx_xvcvuxddp"
1571 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1572 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1573 UNSPEC_VSX_XVCVUXDDP))]
1574 "VECTOR_UNIT_VSX_P (V2DFmode)"
1575 "xvcvuxddp %x0,%x1"
1576 [(set_attr "type" "vecdouble")])
1577
1578 (define_expand "vsx_xvcvdpsxds_scale"
1579 [(match_operand:V2DI 0 "vsx_register_operand" "")
1580 (match_operand:V2DF 1 "vsx_register_operand" "")
1581 (match_operand:QI 2 "immediate_operand" "")]
1582 "VECTOR_UNIT_VSX_P (V2DFmode)"
1583 {
1584 rtx op0 = operands[0];
1585 rtx op1 = operands[1];
1586 rtx tmp = gen_reg_rtx (V2DFmode);
1587 int scale = INTVAL(operands[2]);
1588 if (scale != 0)
1589 rs6000_scale_v2df (tmp, op1, scale);
1590 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
1591 DONE;
1592 })
1593
1594 (define_insn "vsx_xvcvdpsxds"
1595 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1596 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1597 UNSPEC_VSX_XVCVDPSXDS))]
1598 "VECTOR_UNIT_VSX_P (V2DFmode)"
1599 "xvcvdpsxds %x0,%x1"
1600 [(set_attr "type" "vecdouble")])
1601
1602 (define_expand "vsx_xvcvdpuxds_scale"
1603 [(match_operand:V2DI 0 "vsx_register_operand" "")
1604 (match_operand:V2DF 1 "vsx_register_operand" "")
1605 (match_operand:QI 2 "immediate_operand" "")]
1606 "VECTOR_UNIT_VSX_P (V2DFmode)"
1607 {
1608 rtx op0 = operands[0];
1609 rtx op1 = operands[1];
1610 rtx tmp = gen_reg_rtx (V2DFmode);
1611 int scale = INTVAL(operands[2]);
1612 if (scale != 0)
1613 rs6000_scale_v2df (tmp, op1, scale);
1614 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
1615 DONE;
1616 })
1617
1618 (define_insn "vsx_xvcvdpuxds"
1619 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1620 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1621 UNSPEC_VSX_XVCVDPUXDS))]
1622 "VECTOR_UNIT_VSX_P (V2DFmode)"
1623 "xvcvdpuxds %x0,%x1"
1624 [(set_attr "type" "vecdouble")])
1625
1626 ;; Convert from 64-bit to 32-bit types
1627 ;; Note, favor the Altivec registers since the usual use of these instructions
1628 ;; is in vector converts and we need to use the Altivec vperm instruction.
1629
1630 (define_insn "vsx_xvcvdpsxws"
1631 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1632 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1633 UNSPEC_VSX_CVDPSXWS))]
1634 "VECTOR_UNIT_VSX_P (V2DFmode)"
1635 "xvcvdpsxws %x0,%x1"
1636 [(set_attr "type" "vecdouble")])
1637
1638 (define_insn "vsx_xvcvdpuxws"
1639 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1640 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1641 UNSPEC_VSX_CVDPUXWS))]
1642 "VECTOR_UNIT_VSX_P (V2DFmode)"
1643 "xvcvdpuxws %x0,%x1"
1644 [(set_attr "type" "vecdouble")])
1645
1646 (define_insn "vsx_xvcvsxdsp"
1647 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1648 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1649 UNSPEC_VSX_CVSXDSP))]
1650 "VECTOR_UNIT_VSX_P (V2DFmode)"
1651 "xvcvsxdsp %x0,%x1"
1652 [(set_attr "type" "vecfloat")])
1653
1654 (define_insn "vsx_xvcvuxdsp"
1655 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1656 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1657 UNSPEC_VSX_CVUXDSP))]
1658 "VECTOR_UNIT_VSX_P (V2DFmode)"
1659 "xvcvuxwdp %x0,%x1"
1660 [(set_attr "type" "vecdouble")])
1661
1662 ;; Convert from 32-bit to 64-bit types
1663 (define_insn "vsx_xvcvsxwdp"
1664 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1665 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1666 UNSPEC_VSX_CVSXWDP))]
1667 "VECTOR_UNIT_VSX_P (V2DFmode)"
1668 "xvcvsxwdp %x0,%x1"
1669 [(set_attr "type" "vecdouble")])
1670
1671 (define_insn "vsx_xvcvuxwdp"
1672 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1673 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1674 UNSPEC_VSX_CVUXWDP))]
1675 "VECTOR_UNIT_VSX_P (V2DFmode)"
1676 "xvcvuxwdp %x0,%x1"
1677 [(set_attr "type" "vecdouble")])
1678
1679 (define_insn "vsx_xvcvspsxds"
1680 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1681 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1682 UNSPEC_VSX_CVSPSXDS))]
1683 "VECTOR_UNIT_VSX_P (V2DFmode)"
1684 "xvcvspsxds %x0,%x1"
1685 [(set_attr "type" "vecdouble")])
1686
1687 (define_insn "vsx_xvcvspuxds"
1688 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1689 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1690 UNSPEC_VSX_CVSPUXDS))]
1691 "VECTOR_UNIT_VSX_P (V2DFmode)"
1692 "xvcvspuxds %x0,%x1"
1693 [(set_attr "type" "vecdouble")])
1694
1695 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
1696 ;; since the xvrdpiz instruction does not truncate the value if the floating
1697 ;; point value is < LONG_MIN or > LONG_MAX.
1698 (define_insn "*vsx_float_fix_v2df2"
1699 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1700 (float:V2DF
1701 (fix:V2DI
1702 (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
1703 "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
1704 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
1705 && !flag_trapping_math && TARGET_FRIZ"
1706 "xvrdpiz %x0,%x1"
1707 [(set_attr "type" "vecdouble")
1708 (set_attr "fp_type" "fp_addsub_d")])
1709
1710 \f
1711 ;; Permute operations
1712
1713 ;; Build a V2DF/V2DI vector from two scalars
1714 (define_insn "vsx_concat_<mode>"
1715 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1716 (vec_concat:VSX_D
1717 (match_operand:<VS_scalar> 1 "vsx_register_operand" "<VS_64reg>,<VSa>")
1718 (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")))]
1719 "VECTOR_MEM_VSX_P (<MODE>mode)"
1720 {
1721 if (BYTES_BIG_ENDIAN)
1722 return "xxpermdi %x0,%x1,%x2,0";
1723 else
1724 return "xxpermdi %x0,%x2,%x1,0";
1725 }
1726 [(set_attr "type" "vecperm")])
1727
1728 ;; Special purpose concat using xxpermdi to glue two single precision values
1729 ;; together, relying on the fact that internally scalar floats are represented
1730 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
1731 (define_insn "vsx_concat_v2sf"
1732 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1733 (unspec:V2DF
1734 [(match_operand:SF 1 "vsx_register_operand" "f,f")
1735 (match_operand:SF 2 "vsx_register_operand" "f,f")]
1736 UNSPEC_VSX_CONCAT))]
1737 "VECTOR_MEM_VSX_P (V2DFmode)"
1738 {
1739 if (BYTES_BIG_ENDIAN)
1740 return "xxpermdi %x0,%x1,%x2,0";
1741 else
1742 return "xxpermdi %x0,%x2,%x1,0";
1743 }
1744 [(set_attr "type" "vecperm")])
1745
1746 ;; xxpermdi for little endian loads and stores. We need several of
1747 ;; these since the form of the PARALLEL differs by mode.
1748 (define_insn "*vsx_xxpermdi2_le_<mode>"
1749 [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
1750 (vec_select:VSX_LE
1751 (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
1752 (parallel [(const_int 1) (const_int 0)])))]
1753 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1754 "xxpermdi %x0,%x1,%x1,2"
1755 [(set_attr "type" "vecperm")])
1756
1757 (define_insn "*vsx_xxpermdi4_le_<mode>"
1758 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
1759 (vec_select:VSX_W
1760 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
1761 (parallel [(const_int 2) (const_int 3)
1762 (const_int 0) (const_int 1)])))]
1763 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1764 "xxpermdi %x0,%x1,%x1,2"
1765 [(set_attr "type" "vecperm")])
1766
1767 (define_insn "*vsx_xxpermdi8_le_V8HI"
1768 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1769 (vec_select:V8HI
1770 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1771 (parallel [(const_int 4) (const_int 5)
1772 (const_int 6) (const_int 7)
1773 (const_int 0) (const_int 1)
1774 (const_int 2) (const_int 3)])))]
1775 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1776 "xxpermdi %x0,%x1,%x1,2"
1777 [(set_attr "type" "vecperm")])
1778
1779 (define_insn "*vsx_xxpermdi16_le_V16QI"
1780 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1781 (vec_select:V16QI
1782 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1783 (parallel [(const_int 8) (const_int 9)
1784 (const_int 10) (const_int 11)
1785 (const_int 12) (const_int 13)
1786 (const_int 14) (const_int 15)
1787 (const_int 0) (const_int 1)
1788 (const_int 2) (const_int 3)
1789 (const_int 4) (const_int 5)
1790 (const_int 6) (const_int 7)])))]
1791 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1792 "xxpermdi %x0,%x1,%x1,2"
1793 [(set_attr "type" "vecperm")])
1794
1795 ;; lxvd2x for little endian loads. We need several of
1796 ;; these since the form of the PARALLEL differs by mode.
1797 (define_insn "*vsx_lxvd2x2_le_<mode>"
1798 [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
1799 (vec_select:VSX_LE
1800 (match_operand:VSX_LE 1 "memory_operand" "Z")
1801 (parallel [(const_int 1) (const_int 0)])))]
1802 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
1803 "lxvd2x %x0,%y1"
1804 [(set_attr "type" "vecload")])
1805
1806 (define_insn "*vsx_lxvd2x4_le_<mode>"
1807 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
1808 (vec_select:VSX_W
1809 (match_operand:VSX_W 1 "memory_operand" "Z")
1810 (parallel [(const_int 2) (const_int 3)
1811 (const_int 0) (const_int 1)])))]
1812 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
1813 "lxvd2x %x0,%y1"
1814 [(set_attr "type" "vecload")])
1815
1816 (define_insn "*vsx_lxvd2x8_le_V8HI"
1817 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1818 (vec_select:V8HI
1819 (match_operand:V8HI 1 "memory_operand" "Z")
1820 (parallel [(const_int 4) (const_int 5)
1821 (const_int 6) (const_int 7)
1822 (const_int 0) (const_int 1)
1823 (const_int 2) (const_int 3)])))]
1824 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
1825 "lxvd2x %x0,%y1"
1826 [(set_attr "type" "vecload")])
1827
1828 (define_insn "*vsx_lxvd2x16_le_V16QI"
1829 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1830 (vec_select:V16QI
1831 (match_operand:V16QI 1 "memory_operand" "Z")
1832 (parallel [(const_int 8) (const_int 9)
1833 (const_int 10) (const_int 11)
1834 (const_int 12) (const_int 13)
1835 (const_int 14) (const_int 15)
1836 (const_int 0) (const_int 1)
1837 (const_int 2) (const_int 3)
1838 (const_int 4) (const_int 5)
1839 (const_int 6) (const_int 7)])))]
1840 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
1841 "lxvd2x %x0,%y1"
1842 [(set_attr "type" "vecload")])
1843
1844 ;; stxvd2x for little endian stores. We need several of
1845 ;; these since the form of the PARALLEL differs by mode.
1846 (define_insn "*vsx_stxvd2x2_le_<mode>"
1847 [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
1848 (vec_select:VSX_LE
1849 (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
1850 (parallel [(const_int 1) (const_int 0)])))]
1851 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
1852 "stxvd2x %x1,%y0"
1853 [(set_attr "type" "vecstore")])
1854
1855 (define_insn "*vsx_stxvd2x4_le_<mode>"
1856 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
1857 (vec_select:VSX_W
1858 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
1859 (parallel [(const_int 2) (const_int 3)
1860 (const_int 0) (const_int 1)])))]
1861 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
1862 "stxvd2x %x1,%y0"
1863 [(set_attr "type" "vecstore")])
1864
1865 (define_insn "*vsx_stxvd2x8_le_V8HI"
1866 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1867 (vec_select:V8HI
1868 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1869 (parallel [(const_int 4) (const_int 5)
1870 (const_int 6) (const_int 7)
1871 (const_int 0) (const_int 1)
1872 (const_int 2) (const_int 3)])))]
1873 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
1874 "stxvd2x %x1,%y0"
1875 [(set_attr "type" "vecstore")])
1876
1877 (define_insn "*vsx_stxvd2x16_le_V16QI"
1878 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1879 (vec_select:V16QI
1880 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1881 (parallel [(const_int 8) (const_int 9)
1882 (const_int 10) (const_int 11)
1883 (const_int 12) (const_int 13)
1884 (const_int 14) (const_int 15)
1885 (const_int 0) (const_int 1)
1886 (const_int 2) (const_int 3)
1887 (const_int 4) (const_int 5)
1888 (const_int 6) (const_int 7)])))]
1889 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
1890 "stxvd2x %x1,%y0"
1891 [(set_attr "type" "vecstore")])
1892
1893 ;; Convert a TImode value into V1TImode
1894 (define_expand "vsx_set_v1ti"
1895 [(match_operand:V1TI 0 "nonimmediate_operand" "")
1896 (match_operand:V1TI 1 "nonimmediate_operand" "")
1897 (match_operand:TI 2 "input_operand" "")
1898 (match_operand:QI 3 "u5bit_cint_operand" "")]
1899 "VECTOR_MEM_VSX_P (V1TImode)"
1900 {
1901 if (operands[3] != const0_rtx)
1902 gcc_unreachable ();
1903
1904 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
1905 DONE;
1906 })
1907
1908 ;; Set the element of a V2DI/VD2F mode
1909 (define_insn "vsx_set_<mode>"
1910 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>")
1911 (unspec:VSX_D
1912 [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>")
1913 (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")
1914 (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
1915 UNSPEC_VSX_SET))]
1916 "VECTOR_MEM_VSX_P (<MODE>mode)"
1917 {
1918 int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
1919 if (INTVAL (operands[3]) == idx_first)
1920 return \"xxpermdi %x0,%x2,%x1,1\";
1921 else if (INTVAL (operands[3]) == 1 - idx_first)
1922 return \"xxpermdi %x0,%x1,%x2,0\";
1923 else
1924 gcc_unreachable ();
1925 }
1926 [(set_attr "type" "vecperm")])
1927
1928 ;; Extract a DF/DI element from V2DF/V2DI
1929 (define_expand "vsx_extract_<mode>"
1930 [(set (match_operand:<VS_scalar> 0 "register_operand" "")
1931 (vec_select:<VS_scalar> (match_operand:VSX_D 1 "register_operand" "")
1932 (parallel
1933 [(match_operand:QI 2 "u5bit_cint_operand" "")])))]
1934 "VECTOR_MEM_VSX_P (<MODE>mode)"
1935 "")
1936
1937 ;; Optimize cases were we can do a simple or direct move.
1938 ;; Or see if we can avoid doing the move at all
1939 (define_insn "*vsx_extract_<mode>_internal1"
1940 [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,<VS_64reg>,r,r")
1941 (vec_select:<VS_scalar>
1942 (match_operand:VSX_D 1 "register_operand" "d,<VS_64reg>,<VS_64dm>,<VS_64dm>")
1943 (parallel
1944 [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD,wL")])))]
1945 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
1946 {
1947 int op0_regno = REGNO (operands[0]);
1948 int op1_regno = REGNO (operands[1]);
1949
1950 if (op0_regno == op1_regno)
1951 return "nop";
1952
1953 if (INT_REGNO_P (op0_regno))
1954 return ((INTVAL (operands[2]) == VECTOR_ELEMENT_MFVSRLD_64BIT)
1955 ? "mfvsrdl %0,%x1"
1956 : "mfvsrd %0,%x1");
1957
1958 if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
1959 return "fmr %0,%1";
1960
1961 return "xxlor %x0,%x1,%x1";
1962 }
1963 [(set_attr "type" "fp,vecsimple,mftgpr,mftgpr")
1964 (set_attr "length" "4")])
1965
1966 (define_insn "*vsx_extract_<mode>_internal2"
1967 [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=d,<VS_64reg>,<VS_64reg>")
1968 (vec_select:<VS_scalar>
1969 (match_operand:VSX_D 1 "vsx_register_operand" "d,wd,wd")
1970 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "wD,wD,i")])))]
1971 "VECTOR_MEM_VSX_P (<MODE>mode)
1972 && (!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE
1973 || INTVAL (operands[2]) != VECTOR_ELEMENT_SCALAR_64BIT)"
1974 {
1975 int fldDM;
1976 gcc_assert (UINTVAL (operands[2]) <= 1);
1977
1978 if (INTVAL (operands[2]) == VECTOR_ELEMENT_SCALAR_64BIT)
1979 {
1980 int op0_regno = REGNO (operands[0]);
1981 int op1_regno = REGNO (operands[1]);
1982
1983 if (op0_regno == op1_regno)
1984 return "nop";
1985
1986 if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
1987 return "fmr %0,%1";
1988
1989 return "xxlor %x0,%x1,%x1";
1990 }
1991
1992 fldDM = INTVAL (operands[2]) << 1;
1993 if (!BYTES_BIG_ENDIAN)
1994 fldDM = 3 - fldDM;
1995 operands[3] = GEN_INT (fldDM);
1996 return "xxpermdi %x0,%x1,%x1,%3";
1997 }
1998 [(set_attr "type" "fp,vecsimple,vecperm")
1999 (set_attr "length" "4")])
2000
2001 ;; Optimize extracting a single scalar element from memory if the scalar is in
2002 ;; the correct location to use a single load.
2003 (define_insn "*vsx_extract_<mode>_load"
2004 [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,wv,wr")
2005 (vec_select:<VS_scalar>
2006 (match_operand:VSX_D 1 "memory_operand" "m,Z,m")
2007 (parallel [(const_int 0)])))]
2008 "VECTOR_MEM_VSX_P (<MODE>mode)"
2009 "@
2010 lfd%U1%X1 %0,%1
2011 lxsd%U1x %x0,%y1
2012 ld%U1%X1 %0,%1"
2013 [(set_attr "type" "fpload,fpload,load")
2014 (set_attr "length" "4")])
2015
2016 ;; Optimize storing a single scalar element that is the right location to
2017 ;; memory
2018 (define_insn "*vsx_extract_<mode>_store"
2019 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,?Z")
2020 (vec_select:<VS_scalar>
2021 (match_operand:VSX_D 1 "register_operand" "d,wd,<VSa>")
2022 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
2023 "VECTOR_MEM_VSX_P (<MODE>mode)"
2024 "@
2025 stfd%U0%X0 %1,%0
2026 stxsd%U0x %x1,%y0
2027 stxsd%U0x %x1,%y0"
2028 [(set_attr "type" "fpstore")
2029 (set_attr "length" "4")])
2030
2031 ;; Extract a SF element from V4SF
2032 (define_insn_and_split "vsx_extract_v4sf"
2033 [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
2034 (vec_select:SF
2035 (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2036 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "O,i")])))
2037 (clobber (match_scratch:V4SF 3 "=X,0"))]
2038 "VECTOR_UNIT_VSX_P (V4SFmode)"
2039 "@
2040 xscvspdp %x0,%x1
2041 #"
2042 ""
2043 [(const_int 0)]
2044 "
2045 {
2046 rtx op0 = operands[0];
2047 rtx op1 = operands[1];
2048 rtx op2 = operands[2];
2049 rtx op3 = operands[3];
2050 rtx tmp;
2051 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
2052
2053 if (ele == 0)
2054 tmp = op1;
2055 else
2056 {
2057 if (GET_CODE (op3) == SCRATCH)
2058 op3 = gen_reg_rtx (V4SFmode);
2059 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
2060 tmp = op3;
2061 }
2062 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
2063 DONE;
2064 }"
2065 [(set_attr "length" "4,8")
2066 (set_attr "type" "fp")])
2067
2068 ;; Expand the builtin form of xxpermdi to canonical rtl.
2069 (define_expand "vsx_xxpermdi_<mode>"
2070 [(match_operand:VSX_L 0 "vsx_register_operand" "")
2071 (match_operand:VSX_L 1 "vsx_register_operand" "")
2072 (match_operand:VSX_L 2 "vsx_register_operand" "")
2073 (match_operand:QI 3 "u5bit_cint_operand" "")]
2074 "VECTOR_MEM_VSX_P (<MODE>mode)"
2075 {
2076 rtx target = operands[0];
2077 rtx op0 = operands[1];
2078 rtx op1 = operands[2];
2079 int mask = INTVAL (operands[3]);
2080 rtx perm0 = GEN_INT ((mask >> 1) & 1);
2081 rtx perm1 = GEN_INT ((mask & 1) + 2);
2082 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
2083
2084 if (<MODE>mode == V2DFmode)
2085 gen = gen_vsx_xxpermdi2_v2df_1;
2086 else
2087 {
2088 gen = gen_vsx_xxpermdi2_v2di_1;
2089 if (<MODE>mode != V2DImode)
2090 {
2091 target = gen_lowpart (V2DImode, target);
2092 op0 = gen_lowpart (V2DImode, op0);
2093 op1 = gen_lowpart (V2DImode, op1);
2094 }
2095 }
2096 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
2097 transformation we don't want; it is necessary for
2098 rs6000_expand_vec_perm_const_1 but not for this use. So we
2099 prepare for that by reversing the transformation here. */
2100 if (BYTES_BIG_ENDIAN)
2101 emit_insn (gen (target, op0, op1, perm0, perm1));
2102 else
2103 {
2104 rtx p0 = GEN_INT (3 - INTVAL (perm1));
2105 rtx p1 = GEN_INT (3 - INTVAL (perm0));
2106 emit_insn (gen (target, op1, op0, p0, p1));
2107 }
2108 DONE;
2109 })
2110
2111 (define_insn "vsx_xxpermdi2_<mode>_1"
2112 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
2113 (vec_select:VSX_D
2114 (vec_concat:<VS_double>
2115 (match_operand:VSX_D 1 "vsx_register_operand" "wd")
2116 (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
2117 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2118 (match_operand 4 "const_2_to_3_operand" "")])))]
2119 "VECTOR_MEM_VSX_P (<MODE>mode)"
2120 {
2121 int op3, op4, mask;
2122
2123 /* For little endian, swap operands and invert/swap selectors
2124 to get the correct xxpermdi. The operand swap sets up the
2125 inputs as a little endian array. The selectors are swapped
2126 because they are defined to use big endian ordering. The
2127 selectors are inverted to get the correct doublewords for
2128 little endian ordering. */
2129 if (BYTES_BIG_ENDIAN)
2130 {
2131 op3 = INTVAL (operands[3]);
2132 op4 = INTVAL (operands[4]);
2133 }
2134 else
2135 {
2136 op3 = 3 - INTVAL (operands[4]);
2137 op4 = 3 - INTVAL (operands[3]);
2138 }
2139
2140 mask = (op3 << 1) | (op4 - 2);
2141 operands[3] = GEN_INT (mask);
2142
2143 if (BYTES_BIG_ENDIAN)
2144 return "xxpermdi %x0,%x1,%x2,%3";
2145 else
2146 return "xxpermdi %x0,%x2,%x1,%3";
2147 }
2148 [(set_attr "type" "vecperm")])
2149
2150 (define_expand "vec_perm_const<mode>"
2151 [(match_operand:VSX_D 0 "vsx_register_operand" "")
2152 (match_operand:VSX_D 1 "vsx_register_operand" "")
2153 (match_operand:VSX_D 2 "vsx_register_operand" "")
2154 (match_operand:V2DI 3 "" "")]
2155 "VECTOR_MEM_VSX_P (<MODE>mode)"
2156 {
2157 if (rs6000_expand_vec_perm_const (operands))
2158 DONE;
2159 else
2160 FAIL;
2161 })
2162
2163 ;; Expanders for builtins
2164 (define_expand "vsx_mergel_<mode>"
2165 [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
2166 (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
2167 (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
2168 "VECTOR_MEM_VSX_P (<MODE>mode)"
2169 {
2170 rtvec v;
2171 rtx x;
2172
2173 /* Special handling for LE with -maltivec=be. */
2174 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
2175 {
2176 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
2177 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
2178 }
2179 else
2180 {
2181 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
2182 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
2183 }
2184
2185 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
2186 emit_insn (gen_rtx_SET (operands[0], x));
2187 DONE;
2188 })
2189
2190 (define_expand "vsx_mergeh_<mode>"
2191 [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
2192 (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
2193 (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
2194 "VECTOR_MEM_VSX_P (<MODE>mode)"
2195 {
2196 rtvec v;
2197 rtx x;
2198
2199 /* Special handling for LE with -maltivec=be. */
2200 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
2201 {
2202 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
2203 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
2204 }
2205 else
2206 {
2207 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
2208 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
2209 }
2210
2211 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
2212 emit_insn (gen_rtx_SET (operands[0], x));
2213 DONE;
2214 })
2215
2216 ;; V2DF/V2DI splat
2217 (define_insn "vsx_splat_<mode>"
2218 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?<VSa>,?<VSa>,?<VSa>")
2219 (vec_duplicate:VSX_D
2220 (match_operand:<VS_scalar> 1 "splat_input_operand" "<VS_64reg>,f,Z,<VSa>,<VSa>,Z")))]
2221 "VECTOR_MEM_VSX_P (<MODE>mode)"
2222 "@
2223 xxpermdi %x0,%x1,%x1,0
2224 xxpermdi %x0,%x1,%x1,0
2225 lxvdsx %x0,%y1
2226 xxpermdi %x0,%x1,%x1,0
2227 xxpermdi %x0,%x1,%x1,0
2228 lxvdsx %x0,%y1"
2229 [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")])
2230
2231 ;; V4SF/V4SI splat
2232 (define_insn "vsx_xxspltw_<mode>"
2233 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2234 (vec_duplicate:VSX_W
2235 (vec_select:<VS_scalar>
2236 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2237 (parallel
2238 [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
2239 "VECTOR_MEM_VSX_P (<MODE>mode)"
2240 {
2241 if (!BYTES_BIG_ENDIAN)
2242 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
2243
2244 return "xxspltw %x0,%x1,%2";
2245 }
2246 [(set_attr "type" "vecperm")])
2247
2248 (define_insn "vsx_xxspltw_<mode>_direct"
2249 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2250 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2251 (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
2252 UNSPEC_VSX_XXSPLTW))]
2253 "VECTOR_MEM_VSX_P (<MODE>mode)"
2254 "xxspltw %x0,%x1,%2"
2255 [(set_attr "type" "vecperm")])
2256
2257 ;; V2DF/V2DI splat for use by vec_splat builtin
2258 (define_insn "vsx_xxspltd_<mode>"
2259 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2260 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
2261 (match_operand:QI 2 "u5bit_cint_operand" "i")]
2262 UNSPEC_VSX_XXSPLTD))]
2263 "VECTOR_MEM_VSX_P (<MODE>mode)"
2264 {
2265 if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
2266 || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
2267 return "xxpermdi %x0,%x1,%x1,0";
2268 else
2269 return "xxpermdi %x0,%x1,%x1,3";
2270 }
2271 [(set_attr "type" "vecperm")])
2272
2273 ;; V4SF/V4SI interleave
2274 (define_insn "vsx_xxmrghw_<mode>"
2275 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2276 (vec_select:VSX_W
2277 (vec_concat:<VS_double>
2278 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2279 (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
2280 (parallel [(const_int 0) (const_int 4)
2281 (const_int 1) (const_int 5)])))]
2282 "VECTOR_MEM_VSX_P (<MODE>mode)"
2283 {
2284 if (BYTES_BIG_ENDIAN)
2285 return "xxmrghw %x0,%x1,%x2";
2286 else
2287 return "xxmrglw %x0,%x2,%x1";
2288 }
2289 [(set_attr "type" "vecperm")])
2290
2291 (define_insn "vsx_xxmrglw_<mode>"
2292 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2293 (vec_select:VSX_W
2294 (vec_concat:<VS_double>
2295 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2296 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
2297 (parallel [(const_int 2) (const_int 6)
2298 (const_int 3) (const_int 7)])))]
2299 "VECTOR_MEM_VSX_P (<MODE>mode)"
2300 {
2301 if (BYTES_BIG_ENDIAN)
2302 return "xxmrglw %x0,%x1,%x2";
2303 else
2304 return "xxmrghw %x0,%x2,%x1";
2305 }
2306 [(set_attr "type" "vecperm")])
2307
2308 ;; Shift left double by word immediate
2309 (define_insn "vsx_xxsldwi_<mode>"
2310 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
2311 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
2312 (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
2313 (match_operand:QI 3 "u5bit_cint_operand" "i")]
2314 UNSPEC_VSX_SLDWI))]
2315 "VECTOR_MEM_VSX_P (<MODE>mode)"
2316 "xxsldwi %x0,%x1,%x2,%3"
2317 [(set_attr "type" "vecperm")])
2318
2319 \f
2320 ;; Vector reduction insns and splitters
2321
2322 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
2323 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
2324 (VEC_reduc:V2DF
2325 (vec_concat:V2DF
2326 (vec_select:DF
2327 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2328 (parallel [(const_int 1)]))
2329 (vec_select:DF
2330 (match_dup 1)
2331 (parallel [(const_int 0)])))
2332 (match_dup 1)))
2333 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
2334 "VECTOR_UNIT_VSX_P (V2DFmode)"
2335 "#"
2336 ""
2337 [(const_int 0)]
2338 "
2339 {
2340 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
2341 ? gen_reg_rtx (V2DFmode)
2342 : operands[2];
2343 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
2344 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
2345 DONE;
2346 }"
2347 [(set_attr "length" "8")
2348 (set_attr "type" "veccomplex")])
2349
2350 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
2351 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
2352 (VEC_reduc:V4SF
2353 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2354 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
2355 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2356 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
2357 "VECTOR_UNIT_VSX_P (V4SFmode)"
2358 "#"
2359 ""
2360 [(const_int 0)]
2361 "
2362 {
2363 rtx op0 = operands[0];
2364 rtx op1 = operands[1];
2365 rtx tmp2, tmp3, tmp4;
2366
2367 if (can_create_pseudo_p ())
2368 {
2369 tmp2 = gen_reg_rtx (V4SFmode);
2370 tmp3 = gen_reg_rtx (V4SFmode);
2371 tmp4 = gen_reg_rtx (V4SFmode);
2372 }
2373 else
2374 {
2375 tmp2 = operands[2];
2376 tmp3 = operands[3];
2377 tmp4 = tmp2;
2378 }
2379
2380 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2381 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2382 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2383 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
2384 DONE;
2385 }"
2386 [(set_attr "length" "16")
2387 (set_attr "type" "veccomplex")])
2388
2389 ;; Combiner patterns with the vector reduction patterns that knows we can get
2390 ;; to the top element of the V2DF array without doing an extract.
2391
2392 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
2393 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
2394 (vec_select:DF
2395 (VEC_reduc:V2DF
2396 (vec_concat:V2DF
2397 (vec_select:DF
2398 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2399 (parallel [(const_int 1)]))
2400 (vec_select:DF
2401 (match_dup 1)
2402 (parallel [(const_int 0)])))
2403 (match_dup 1))
2404 (parallel [(const_int 1)])))
2405 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
2406 "VECTOR_UNIT_VSX_P (V2DFmode)"
2407 "#"
2408 ""
2409 [(const_int 0)]
2410 "
2411 {
2412 rtx hi = gen_highpart (DFmode, operands[1]);
2413 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
2414 ? gen_reg_rtx (DFmode)
2415 : operands[2];
2416
2417 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
2418 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
2419 DONE;
2420 }"
2421 [(set_attr "length" "8")
2422 (set_attr "type" "veccomplex")])
2423
2424 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
2425 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
2426 (vec_select:SF
2427 (VEC_reduc:V4SF
2428 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2429 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
2430 (parallel [(const_int 3)])))
2431 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2432 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
2433 (clobber (match_scratch:V4SF 4 "=0,0"))]
2434 "VECTOR_UNIT_VSX_P (V4SFmode)"
2435 "#"
2436 ""
2437 [(const_int 0)]
2438 "
2439 {
2440 rtx op0 = operands[0];
2441 rtx op1 = operands[1];
2442 rtx tmp2, tmp3, tmp4, tmp5;
2443
2444 if (can_create_pseudo_p ())
2445 {
2446 tmp2 = gen_reg_rtx (V4SFmode);
2447 tmp3 = gen_reg_rtx (V4SFmode);
2448 tmp4 = gen_reg_rtx (V4SFmode);
2449 tmp5 = gen_reg_rtx (V4SFmode);
2450 }
2451 else
2452 {
2453 tmp2 = operands[2];
2454 tmp3 = operands[3];
2455 tmp4 = tmp2;
2456 tmp5 = operands[4];
2457 }
2458
2459 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2460 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2461 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2462 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
2463 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
2464 DONE;
2465 }"
2466 [(set_attr "length" "20")
2467 (set_attr "type" "veccomplex")])
2468
2469 \f
2470 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
2471 (define_peephole
2472 [(set (match_operand:P 0 "base_reg_operand" "")
2473 (match_operand:P 1 "short_cint_operand" ""))
2474 (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
2475 (mem:VSX_M2 (plus:P (match_dup 0)
2476 (match_operand:P 3 "int_reg_operand" ""))))]
2477 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
2478 "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
2479 [(set_attr "length" "8")
2480 (set_attr "type" "vecload")])
2481
2482 (define_peephole
2483 [(set (match_operand:P 0 "base_reg_operand" "")
2484 (match_operand:P 1 "short_cint_operand" ""))
2485 (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
2486 (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
2487 (match_dup 0))))]
2488 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
2489 "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
2490 [(set_attr "length" "8")
2491 (set_attr "type" "vecload")])