2 ;; Copyright (C) 2009-2016 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Iterator for both scalar and vector floating point types supported by VSX
22 (define_mode_iterator VSX_B [DF V4SF V2DF])
24 ;; Iterator for the 2 64-bit vector types
25 (define_mode_iterator VSX_D [V2DF V2DI])
27 ;; Iterator for the 2 64-bit vector types + 128-bit types that are loaded with
28 ;; lxvd2x to properly handle swapping words on little endian
29 (define_mode_iterator VSX_LE [V2DF V2DI V1TI])
31 ;; Mode iterator to handle swapping words on little endian for the 128-bit
32 ;; types that goes in a single vector register.
33 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
34 (TF "FLOAT128_VECTOR_P (TFmode)")
35 (TI "TARGET_VSX_TIMODE")])
37 ;; Iterator for the 2 32-bit vector types
38 (define_mode_iterator VSX_W [V4SF V4SI])
40 ;; Iterator for the DF types
41 (define_mode_iterator VSX_DF [V2DF DF])
43 ;; Iterator for vector floating point types supported by VSX
44 (define_mode_iterator VSX_F [V4SF V2DF])
46 ;; Iterator for logical types supported by VSX
47 (define_mode_iterator VSX_L [V16QI
55 (KF "FLOAT128_VECTOR_P (KFmode)")
56 (TF "FLOAT128_VECTOR_P (TFmode)")])
58 ;; Iterator for memory move. Handle TImode specially to allow
59 ;; it to use gprs as well as vsx registers.
60 (define_mode_iterator VSX_M [V16QI
67 (KF "FLOAT128_VECTOR_P (KFmode)")
68 (TF "FLOAT128_VECTOR_P (TFmode)")])
70 (define_mode_iterator VSX_M2 [V16QI
77 (KF "FLOAT128_VECTOR_P (KFmode)")
78 (TF "FLOAT128_VECTOR_P (TFmode)")
79 (TI "TARGET_VSX_TIMODE")])
81 ;; Map into the appropriate load/store name based on the type
82 (define_mode_attr VSm [(V16QI "vw4")
94 ;; Map into the appropriate suffix based on the type
95 (define_mode_attr VSs [(V16QI "sp")
108 ;; Map the register class used
109 (define_mode_attr VSr [(V16QI "v")
123 ;; Map the register class used for float<->int conversions (floating point side)
124 ;; VSr2 is the preferred register class, VSr3 is any register class that will
126 (define_mode_attr VSr2 [(V2DF "wd")
132 (define_mode_attr VSr3 [(V2DF "wa")
138 ;; Map the register class for sp<->dp float conversions, destination
139 (define_mode_attr VSr4 [(SF "ws")
144 ;; Map the register class for sp<->dp float conversions, source
145 (define_mode_attr VSr5 [(SF "ws")
150 ;; The VSX register class that a type can occupy, even if it is not the
151 ;; preferred register class (VSr is the preferred register class that will get
153 (define_mode_attr VSa [(V16QI "wa")
167 ;; Same size integer type for floating point data
168 (define_mode_attr VSi [(V4SF "v4si")
172 (define_mode_attr VSI [(V4SF "V4SI")
176 ;; Word size for same size conversion
177 (define_mode_attr VSc [(V4SF "w")
181 ;; Map into either s or v, depending on whether this is a scalar or vector
183 (define_mode_attr VSv [(V16QI "v")
193 ;; Appropriate type for add ops (and other simple FP ops)
194 (define_mode_attr VStype_simple [(V2DF "vecdouble")
198 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
202 ;; Appropriate type for multiply ops
203 (define_mode_attr VStype_mul [(V2DF "vecdouble")
207 (define_mode_attr VSfptype_mul [(V2DF "fp_mul_d")
211 ;; Appropriate type for divide ops.
212 (define_mode_attr VStype_div [(V2DF "vecdiv")
216 (define_mode_attr VSfptype_div [(V2DF "fp_div_d")
220 ;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with
222 (define_mode_attr VStype_sqrt [(V2DF "dsqrt")
226 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
230 ;; Iterator and modes for sp<->dp conversions
231 ;; Because scalar SF values are represented internally as double, use the
232 ;; V4SF type to represent this than SF.
233 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
235 (define_mode_attr VS_spdp_res [(DF "V4SF")
239 (define_mode_attr VS_spdp_insn [(DF "xscvdpsp")
243 (define_mode_attr VS_spdp_type [(DF "fp")
247 ;; Map the scalar mode for a vector type
248 (define_mode_attr VS_scalar [(V1TI "TI")
256 ;; Map to a double-sized vector mode
257 (define_mode_attr VS_double [(V4SI "V8SI")
263 ;; Map register class for 64-bit element in 128-bit vector for direct moves
265 (define_mode_attr VS_64dm [(V2DF "wk")
268 ;; Map register class for 64-bit element in 128-bit vector for normal register
270 (define_mode_attr VS_64reg [(V2DF "ws")
273 ;; Constants for creating unspecs
274 (define_c_enum "unspec"
300 UNSPEC_VSX_XVCVDPSXDS
301 UNSPEC_VSX_XVCVDPUXDS
306 (define_insn "*p9_vecload_<mode>"
307 [(set (match_operand:VSX_M 0 "vsx_register_operand" "=<VSa>")
308 (match_operand:VSX_M 1 "memory_operand" "Z"))]
311 [(set_attr "type" "vecload")
312 (set_attr "length" "4")])
314 (define_insn "*p9_vecstore_<mode>"
315 [(set (match_operand:VSX_M 0 "memory_operand" "=Z")
316 (match_operand:VSX_M 1 "vsx_register_operand" "<VSa>"))]
319 [(set_attr "type" "vecstore")
320 (set_attr "length" "4")])
324 ;; The patterns for LE permuted loads and stores come before the general
325 ;; VSX moves so they match first.
326 (define_insn_and_split "*vsx_le_perm_load_<mode>"
327 [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
328 (match_operand:VSX_LE 1 "memory_operand" "Z"))]
329 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
331 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
335 (parallel [(const_int 1) (const_int 0)])))
339 (parallel [(const_int 1) (const_int 0)])))]
342 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
346 [(set_attr "type" "vecload")
347 (set_attr "length" "8")])
349 (define_insn_and_split "*vsx_le_perm_load_<mode>"
350 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
351 (match_operand:VSX_W 1 "memory_operand" "Z"))]
352 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
354 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
358 (parallel [(const_int 2) (const_int 3)
359 (const_int 0) (const_int 1)])))
363 (parallel [(const_int 2) (const_int 3)
364 (const_int 0) (const_int 1)])))]
367 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
371 [(set_attr "type" "vecload")
372 (set_attr "length" "8")])
374 (define_insn_and_split "*vsx_le_perm_load_v8hi"
375 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
376 (match_operand:V8HI 1 "memory_operand" "Z"))]
377 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
379 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
383 (parallel [(const_int 4) (const_int 5)
384 (const_int 6) (const_int 7)
385 (const_int 0) (const_int 1)
386 (const_int 2) (const_int 3)])))
390 (parallel [(const_int 4) (const_int 5)
391 (const_int 6) (const_int 7)
392 (const_int 0) (const_int 1)
393 (const_int 2) (const_int 3)])))]
396 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
400 [(set_attr "type" "vecload")
401 (set_attr "length" "8")])
403 (define_insn_and_split "*vsx_le_perm_load_v16qi"
404 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
405 (match_operand:V16QI 1 "memory_operand" "Z"))]
406 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
408 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
412 (parallel [(const_int 8) (const_int 9)
413 (const_int 10) (const_int 11)
414 (const_int 12) (const_int 13)
415 (const_int 14) (const_int 15)
416 (const_int 0) (const_int 1)
417 (const_int 2) (const_int 3)
418 (const_int 4) (const_int 5)
419 (const_int 6) (const_int 7)])))
423 (parallel [(const_int 8) (const_int 9)
424 (const_int 10) (const_int 11)
425 (const_int 12) (const_int 13)
426 (const_int 14) (const_int 15)
427 (const_int 0) (const_int 1)
428 (const_int 2) (const_int 3)
429 (const_int 4) (const_int 5)
430 (const_int 6) (const_int 7)])))]
433 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
437 [(set_attr "type" "vecload")
438 (set_attr "length" "8")])
440 (define_insn "*vsx_le_perm_store_<mode>"
441 [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
442 (match_operand:VSX_LE 1 "vsx_register_operand" "+<VSa>"))]
443 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
445 [(set_attr "type" "vecstore")
446 (set_attr "length" "12")])
449 [(set (match_operand:VSX_LE 0 "memory_operand" "")
450 (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
451 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
455 (parallel [(const_int 1) (const_int 0)])))
459 (parallel [(const_int 1) (const_int 0)])))]
461 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
465 ;; The post-reload split requires that we re-permute the source
466 ;; register in case it is still live.
468 [(set (match_operand:VSX_LE 0 "memory_operand" "")
469 (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
470 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
474 (parallel [(const_int 1) (const_int 0)])))
478 (parallel [(const_int 1) (const_int 0)])))
482 (parallel [(const_int 1) (const_int 0)])))]
485 (define_insn "*vsx_le_perm_store_<mode>"
486 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
487 (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
488 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
490 [(set_attr "type" "vecstore")
491 (set_attr "length" "12")])
494 [(set (match_operand:VSX_W 0 "memory_operand" "")
495 (match_operand:VSX_W 1 "vsx_register_operand" ""))]
496 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
500 (parallel [(const_int 2) (const_int 3)
501 (const_int 0) (const_int 1)])))
505 (parallel [(const_int 2) (const_int 3)
506 (const_int 0) (const_int 1)])))]
508 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
512 ;; The post-reload split requires that we re-permute the source
513 ;; register in case it is still live.
515 [(set (match_operand:VSX_W 0 "memory_operand" "")
516 (match_operand:VSX_W 1 "vsx_register_operand" ""))]
517 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
521 (parallel [(const_int 2) (const_int 3)
522 (const_int 0) (const_int 1)])))
526 (parallel [(const_int 2) (const_int 3)
527 (const_int 0) (const_int 1)])))
531 (parallel [(const_int 2) (const_int 3)
532 (const_int 0) (const_int 1)])))]
535 (define_insn "*vsx_le_perm_store_v8hi"
536 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
537 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
538 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
540 [(set_attr "type" "vecstore")
541 (set_attr "length" "12")])
544 [(set (match_operand:V8HI 0 "memory_operand" "")
545 (match_operand:V8HI 1 "vsx_register_operand" ""))]
546 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
550 (parallel [(const_int 4) (const_int 5)
551 (const_int 6) (const_int 7)
552 (const_int 0) (const_int 1)
553 (const_int 2) (const_int 3)])))
557 (parallel [(const_int 4) (const_int 5)
558 (const_int 6) (const_int 7)
559 (const_int 0) (const_int 1)
560 (const_int 2) (const_int 3)])))]
562 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
566 ;; The post-reload split requires that we re-permute the source
567 ;; register in case it is still live.
569 [(set (match_operand:V8HI 0 "memory_operand" "")
570 (match_operand:V8HI 1 "vsx_register_operand" ""))]
571 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
575 (parallel [(const_int 4) (const_int 5)
576 (const_int 6) (const_int 7)
577 (const_int 0) (const_int 1)
578 (const_int 2) (const_int 3)])))
582 (parallel [(const_int 4) (const_int 5)
583 (const_int 6) (const_int 7)
584 (const_int 0) (const_int 1)
585 (const_int 2) (const_int 3)])))
589 (parallel [(const_int 4) (const_int 5)
590 (const_int 6) (const_int 7)
591 (const_int 0) (const_int 1)
592 (const_int 2) (const_int 3)])))]
595 (define_insn "*vsx_le_perm_store_v16qi"
596 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
597 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
598 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
600 [(set_attr "type" "vecstore")
601 (set_attr "length" "12")])
604 [(set (match_operand:V16QI 0 "memory_operand" "")
605 (match_operand:V16QI 1 "vsx_register_operand" ""))]
606 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
610 (parallel [(const_int 8) (const_int 9)
611 (const_int 10) (const_int 11)
612 (const_int 12) (const_int 13)
613 (const_int 14) (const_int 15)
614 (const_int 0) (const_int 1)
615 (const_int 2) (const_int 3)
616 (const_int 4) (const_int 5)
617 (const_int 6) (const_int 7)])))
621 (parallel [(const_int 8) (const_int 9)
622 (const_int 10) (const_int 11)
623 (const_int 12) (const_int 13)
624 (const_int 14) (const_int 15)
625 (const_int 0) (const_int 1)
626 (const_int 2) (const_int 3)
627 (const_int 4) (const_int 5)
628 (const_int 6) (const_int 7)])))]
630 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
634 ;; The post-reload split requires that we re-permute the source
635 ;; register in case it is still live.
637 [(set (match_operand:V16QI 0 "memory_operand" "")
638 (match_operand:V16QI 1 "vsx_register_operand" ""))]
639 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
643 (parallel [(const_int 8) (const_int 9)
644 (const_int 10) (const_int 11)
645 (const_int 12) (const_int 13)
646 (const_int 14) (const_int 15)
647 (const_int 0) (const_int 1)
648 (const_int 2) (const_int 3)
649 (const_int 4) (const_int 5)
650 (const_int 6) (const_int 7)])))
654 (parallel [(const_int 8) (const_int 9)
655 (const_int 10) (const_int 11)
656 (const_int 12) (const_int 13)
657 (const_int 14) (const_int 15)
658 (const_int 0) (const_int 1)
659 (const_int 2) (const_int 3)
660 (const_int 4) (const_int 5)
661 (const_int 6) (const_int 7)])))
665 (parallel [(const_int 8) (const_int 9)
666 (const_int 10) (const_int 11)
667 (const_int 12) (const_int 13)
668 (const_int 14) (const_int 15)
669 (const_int 0) (const_int 1)
670 (const_int 2) (const_int 3)
671 (const_int 4) (const_int 5)
672 (const_int 6) (const_int 7)])))]
675 ;; Little endian word swapping for 128-bit types that are either scalars or the
676 ;; special V1TI container class, which it is not appropriate to use vec_select
678 (define_insn "*vsx_le_permute_<mode>"
679 [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
681 (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
683 "!BYTES_BIG_ENDIAN && TARGET_VSX"
685 xxpermdi %x0,%x1,%x1,2
688 [(set_attr "length" "4")
689 (set_attr "type" "vecperm,vecload,vecstore")])
691 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
692 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
695 (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
698 "!BYTES_BIG_ENDIAN && TARGET_VSX"
703 [(set (match_dup 0) (match_dup 1))]
705 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
707 emit_note (NOTE_INSN_DELETED);
711 [(set_attr "length" "0,4")
712 (set_attr "type" "vecsimple")])
714 (define_insn_and_split "*vsx_le_perm_load_<mode>"
715 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>")
716 (match_operand:VSX_LE_128 1 "memory_operand" "Z"))]
717 "!BYTES_BIG_ENDIAN && TARGET_VSX"
719 "!BYTES_BIG_ENDIAN && TARGET_VSX"
721 (rotate:VSX_LE_128 (match_dup 1)
724 (rotate:VSX_LE_128 (match_dup 2)
728 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
732 [(set_attr "type" "vecload")
733 (set_attr "length" "8")])
735 (define_insn "*vsx_le_perm_store_<mode>"
736 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z")
737 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>"))]
738 "!BYTES_BIG_ENDIAN && TARGET_VSX"
740 [(set_attr "type" "vecstore")
741 (set_attr "length" "12")])
744 [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
745 (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
746 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
748 (rotate:VSX_LE_128 (match_dup 1)
751 (rotate:VSX_LE_128 (match_dup 2)
754 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
758 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
759 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
760 ;; floating point are handled by the more generic swap elimination pass.
762 [(set (match_operand:TI 0 "vsx_register_operand" "")
763 (rotate:TI (match_operand:TI 1 "vsx_register_operand" "")
765 (set (match_operand:TI 2 "vsx_register_operand" "")
766 (rotate:TI (match_dup 0)
768 "!BYTES_BIG_ENDIAN && TARGET_VSX && TARGET_VSX_TIMODE
769 && (rtx_equal_p (operands[0], operands[2])
770 || peep2_reg_dead_p (2, operands[0]))"
771 [(set (match_dup 2) (match_dup 1))])
773 ;; The post-reload split requires that we re-permute the source
774 ;; register in case it is still live.
776 [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
777 (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
778 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
780 (rotate:VSX_LE_128 (match_dup 1)
783 (rotate:VSX_LE_128 (match_dup 1)
786 (rotate:VSX_LE_128 (match_dup 1)
790 (define_insn "*vsx_mov<mode>"
791 [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?<VSa>,?<VSa>,r,we,wQ,?&r,??Y,??r,??r,<VSr>,?<VSa>,*r,v,wZ,v")
792 (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,<VSa>,Z,<VSa>,we,b,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
793 "VECTOR_MEM_VSX_P (<MODE>mode)
794 && (register_operand (operands[0], <MODE>mode)
795 || register_operand (operands[1], <MODE>mode))"
797 return rs6000_output_move_128bit (operands);
799 [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,mffgpr,mftgpr,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
800 (set_attr "length" "4,4,4,4,4,4,8,4,12,12,12,12,16,4,4,*,16,4,4")])
802 ;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
803 ;; use of TImode is for unions. However for plain data movement, slightly
804 ;; favor the vector loads
805 (define_insn "*vsx_movti_64bit"
806 [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,r,we,v,v,wZ,wQ,&r,Y,r,r,?r")
807 (match_operand:TI 1 "input_operand" "wa,Z,wa,O,we,b,W,wZ,v,r,wQ,r,Y,r,n"))]
808 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
809 && (register_operand (operands[0], TImode)
810 || register_operand (operands[1], TImode))"
812 return rs6000_output_move_128bit (operands);
814 [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,mffgpr,mftgpr,vecsimple,vecstore,vecload,store,load,store,load,*,*")
815 (set_attr "length" "4,4,4,4,8,4,16,4,4,8,8,8,8,8,8")])
817 (define_insn "*vsx_movti_32bit"
818 [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
819 (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v,r,r, Q, Y, r,n"))]
820 "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
821 && (register_operand (operands[0], TImode)
822 || register_operand (operands[1], TImode))"
824 switch (which_alternative)
827 return "stxvd2x %x1,%y0";
830 return "lxvd2x %x0,%y1";
833 return "xxlor %x0,%x1,%x1";
836 return "xxlxor %x0,%x0,%x0";
839 return output_vec_const_move (operands);
842 return "stvx %1,%y0";
849 return \"stswi %1,%P0,16\";
855 /* If the address is not used in the output, we can use lsi. Otherwise,
856 fall through to generating four loads. */
858 && ! reg_overlap_mentioned_p (operands[0], operands[1]))
859 return \"lswi %0,%P1,16\";
860 /* ... fall through ... */
870 [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,store,load,load, *, *")
871 (set_attr "update" " *, *, *, *, *, *, *, yes, yes, yes, yes, *, *")
872 (set_attr "length" " 4, 4, 4, 4, 8, 4, 4, 16, 16, 16, 16,16,16")
873 (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
874 (const_string "always")
875 (const_string "conditional")))])
877 ;; Explicit load/store expanders for the builtin functions
878 (define_expand "vsx_load_<mode>"
879 [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
880 (match_operand:VSX_M 1 "memory_operand" ""))]
881 "VECTOR_MEM_VSX_P (<MODE>mode)"
884 (define_expand "vsx_store_<mode>"
885 [(set (match_operand:VSX_M 0 "memory_operand" "")
886 (match_operand:VSX_M 1 "vsx_register_operand" ""))]
887 "VECTOR_MEM_VSX_P (<MODE>mode)"
891 ;; VSX vector floating point arithmetic instructions. The VSX scalar
892 ;; instructions are now combined with the insn for the traditional floating
894 (define_insn "*vsx_add<mode>3"
895 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
896 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
897 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
898 "VECTOR_UNIT_VSX_P (<MODE>mode)"
899 "xvadd<VSs> %x0,%x1,%x2"
900 [(set_attr "type" "<VStype_simple>")
901 (set_attr "fp_type" "<VSfptype_simple>")])
903 (define_insn "*vsx_sub<mode>3"
904 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
905 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
906 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
907 "VECTOR_UNIT_VSX_P (<MODE>mode)"
908 "xvsub<VSs> %x0,%x1,%x2"
909 [(set_attr "type" "<VStype_simple>")
910 (set_attr "fp_type" "<VSfptype_simple>")])
912 (define_insn "*vsx_mul<mode>3"
913 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
914 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
915 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
916 "VECTOR_UNIT_VSX_P (<MODE>mode)"
917 "xvmul<VSs> %x0,%x1,%x2"
918 [(set_attr "type" "<VStype_simple>")
919 (set_attr "fp_type" "<VSfptype_mul>")])
921 ; Emulate vector with scalar for vec_mul in V2DImode
922 (define_insn_and_split "vsx_mul_v2di"
923 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
924 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
925 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
927 "VECTOR_MEM_VSX_P (V2DImode)"
929 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
933 rtx op0 = operands[0];
934 rtx op1 = operands[1];
935 rtx op2 = operands[2];
936 rtx op3 = gen_reg_rtx (DImode);
937 rtx op4 = gen_reg_rtx (DImode);
938 rtx op5 = gen_reg_rtx (DImode);
939 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
940 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
941 emit_insn (gen_muldi3 (op5, op3, op4));
942 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
943 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
944 emit_insn (gen_muldi3 (op3, op3, op4));
945 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
948 [(set_attr "type" "mul")])
950 (define_insn "*vsx_div<mode>3"
951 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
952 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
953 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
954 "VECTOR_UNIT_VSX_P (<MODE>mode)"
955 "xvdiv<VSs> %x0,%x1,%x2"
956 [(set_attr "type" "<VStype_div>")
957 (set_attr "fp_type" "<VSfptype_div>")])
959 ; Emulate vector with scalar for vec_div in V2DImode
960 (define_insn_and_split "vsx_div_v2di"
961 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
962 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
963 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
965 "VECTOR_MEM_VSX_P (V2DImode)"
967 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
971 rtx op0 = operands[0];
972 rtx op1 = operands[1];
973 rtx op2 = operands[2];
974 rtx op3 = gen_reg_rtx (DImode);
975 rtx op4 = gen_reg_rtx (DImode);
976 rtx op5 = gen_reg_rtx (DImode);
977 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
978 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
979 emit_insn (gen_divdi3 (op5, op3, op4));
980 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
981 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
982 emit_insn (gen_divdi3 (op3, op3, op4));
983 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
986 [(set_attr "type" "div")])
988 (define_insn_and_split "vsx_udiv_v2di"
989 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
990 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
991 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
993 "VECTOR_MEM_VSX_P (V2DImode)"
995 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
999 rtx op0 = operands[0];
1000 rtx op1 = operands[1];
1001 rtx op2 = operands[2];
1002 rtx op3 = gen_reg_rtx (DImode);
1003 rtx op4 = gen_reg_rtx (DImode);
1004 rtx op5 = gen_reg_rtx (DImode);
1005 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1006 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1007 emit_insn (gen_udivdi3 (op5, op3, op4));
1008 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1009 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1010 emit_insn (gen_udivdi3 (op3, op3, op4));
1011 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1014 [(set_attr "type" "div")])
1016 ;; *tdiv* instruction returning the FG flag
1017 (define_expand "vsx_tdiv<mode>3_fg"
1019 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1020 (match_operand:VSX_B 2 "vsx_register_operand" "")]
1022 (set (match_operand:SI 0 "gpc_reg_operand" "")
1023 (gt:SI (match_dup 3)
1025 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1027 operands[3] = gen_reg_rtx (CCFPmode);
1030 ;; *tdiv* instruction returning the FE flag
1031 (define_expand "vsx_tdiv<mode>3_fe"
1033 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1034 (match_operand:VSX_B 2 "vsx_register_operand" "")]
1036 (set (match_operand:SI 0 "gpc_reg_operand" "")
1037 (eq:SI (match_dup 3)
1039 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1041 operands[3] = gen_reg_rtx (CCFPmode);
1044 (define_insn "*vsx_tdiv<mode>3_internal"
1045 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1046 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1047 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1049 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1050 "x<VSv>tdiv<VSs> %0,%x1,%x2"
1051 [(set_attr "type" "<VStype_simple>")
1052 (set_attr "fp_type" "<VSfptype_simple>")])
1054 (define_insn "vsx_fre<mode>2"
1055 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1056 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1058 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1060 [(set_attr "type" "<VStype_simple>")
1061 (set_attr "fp_type" "<VSfptype_simple>")])
1063 (define_insn "*vsx_neg<mode>2"
1064 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1065 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1066 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1067 "xvneg<VSs> %x0,%x1"
1068 [(set_attr "type" "<VStype_simple>")
1069 (set_attr "fp_type" "<VSfptype_simple>")])
1071 (define_insn "*vsx_abs<mode>2"
1072 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1073 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1074 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1075 "xvabs<VSs> %x0,%x1"
1076 [(set_attr "type" "<VStype_simple>")
1077 (set_attr "fp_type" "<VSfptype_simple>")])
1079 (define_insn "vsx_nabs<mode>2"
1080 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1083 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1084 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1085 "xvnabs<VSs> %x0,%x1"
1086 [(set_attr "type" "<VStype_simple>")
1087 (set_attr "fp_type" "<VSfptype_simple>")])
1089 (define_insn "vsx_smax<mode>3"
1090 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1091 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1092 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1093 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1094 "xvmax<VSs> %x0,%x1,%x2"
1095 [(set_attr "type" "<VStype_simple>")
1096 (set_attr "fp_type" "<VSfptype_simple>")])
1098 (define_insn "*vsx_smin<mode>3"
1099 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1100 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1101 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1102 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1103 "xvmin<VSs> %x0,%x1,%x2"
1104 [(set_attr "type" "<VStype_simple>")
1105 (set_attr "fp_type" "<VSfptype_simple>")])
1107 (define_insn "*vsx_sqrt<mode>2"
1108 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1109 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1110 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1111 "xvsqrt<VSs> %x0,%x1"
1112 [(set_attr "type" "<VStype_sqrt>")
1113 (set_attr "fp_type" "<VSfptype_sqrt>")])
1115 (define_insn "*vsx_rsqrte<mode>2"
1116 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1117 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1119 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1120 "xvrsqrte<VSs> %x0,%x1"
1121 [(set_attr "type" "<VStype_simple>")
1122 (set_attr "fp_type" "<VSfptype_simple>")])
1124 ;; *tsqrt* returning the fg flag
1125 (define_expand "vsx_tsqrt<mode>2_fg"
1127 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1129 (set (match_operand:SI 0 "gpc_reg_operand" "")
1130 (gt:SI (match_dup 3)
1132 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1134 operands[3] = gen_reg_rtx (CCFPmode);
1137 ;; *tsqrt* returning the fe flag
1138 (define_expand "vsx_tsqrt<mode>2_fe"
1140 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1142 (set (match_operand:SI 0 "gpc_reg_operand" "")
1143 (eq:SI (match_dup 3)
1145 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1147 operands[3] = gen_reg_rtx (CCFPmode);
1150 (define_insn "*vsx_tsqrt<mode>2_internal"
1151 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1152 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1154 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1155 "x<VSv>tsqrt<VSs> %0,%x1"
1156 [(set_attr "type" "<VStype_simple>")
1157 (set_attr "fp_type" "<VSfptype_simple>")])
1159 ;; Fused vector multiply/add instructions. Support the classical Altivec
1160 ;; versions of fma, which allows the target to be a separate register from the
1161 ;; 3 inputs. Under VSX, the target must be either the addend or the first
1164 (define_insn "*vsx_fmav4sf4"
1165 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1167 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1168 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1169 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1170 "VECTOR_UNIT_VSX_P (V4SFmode)"
1172 xvmaddasp %x0,%x1,%x2
1173 xvmaddmsp %x0,%x1,%x3
1174 xvmaddasp %x0,%x1,%x2
1175 xvmaddmsp %x0,%x1,%x3
1176 vmaddfp %0,%1,%2,%3"
1177 [(set_attr "type" "vecfloat")])
1179 (define_insn "*vsx_fmav2df4"
1180 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1182 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1183 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1184 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1185 "VECTOR_UNIT_VSX_P (V2DFmode)"
1187 xvmaddadp %x0,%x1,%x2
1188 xvmaddmdp %x0,%x1,%x3
1189 xvmaddadp %x0,%x1,%x2
1190 xvmaddmdp %x0,%x1,%x3"
1191 [(set_attr "type" "vecdouble")])
1193 (define_insn "*vsx_fms<mode>4"
1194 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1196 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1197 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1199 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1200 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1202 xvmsuba<VSs> %x0,%x1,%x2
1203 xvmsubm<VSs> %x0,%x1,%x3
1204 xvmsuba<VSs> %x0,%x1,%x2
1205 xvmsubm<VSs> %x0,%x1,%x3"
1206 [(set_attr "type" "<VStype_mul>")])
1208 (define_insn "*vsx_nfma<mode>4"
1209 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1212 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1213 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1214 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1215 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1217 xvnmadda<VSs> %x0,%x1,%x2
1218 xvnmaddm<VSs> %x0,%x1,%x3
1219 xvnmadda<VSs> %x0,%x1,%x2
1220 xvnmaddm<VSs> %x0,%x1,%x3"
1221 [(set_attr "type" "<VStype_mul>")
1222 (set_attr "fp_type" "<VSfptype_mul>")])
1224 (define_insn "*vsx_nfmsv4sf4"
1225 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1228 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1229 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1231 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1232 "VECTOR_UNIT_VSX_P (V4SFmode)"
1234 xvnmsubasp %x0,%x1,%x2
1235 xvnmsubmsp %x0,%x1,%x3
1236 xvnmsubasp %x0,%x1,%x2
1237 xvnmsubmsp %x0,%x1,%x3
1238 vnmsubfp %0,%1,%2,%3"
1239 [(set_attr "type" "vecfloat")])
1241 (define_insn "*vsx_nfmsv2df4"
1242 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1245 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1246 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1248 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1249 "VECTOR_UNIT_VSX_P (V2DFmode)"
1251 xvnmsubadp %x0,%x1,%x2
1252 xvnmsubmdp %x0,%x1,%x3
1253 xvnmsubadp %x0,%x1,%x2
1254 xvnmsubmdp %x0,%x1,%x3"
1255 [(set_attr "type" "vecdouble")])
1257 ;; Vector conditional expressions (no scalar version for these instructions)
1258 (define_insn "vsx_eq<mode>"
1259 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1260 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1261 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1262 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1263 "xvcmpeq<VSs> %x0,%x1,%x2"
1264 [(set_attr "type" "<VStype_simple>")
1265 (set_attr "fp_type" "<VSfptype_simple>")])
1267 (define_insn "vsx_gt<mode>"
1268 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1269 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1270 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1271 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1272 "xvcmpgt<VSs> %x0,%x1,%x2"
1273 [(set_attr "type" "<VStype_simple>")
1274 (set_attr "fp_type" "<VSfptype_simple>")])
1276 (define_insn "*vsx_ge<mode>"
1277 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1278 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1279 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1280 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1281 "xvcmpge<VSs> %x0,%x1,%x2"
1282 [(set_attr "type" "<VStype_simple>")
1283 (set_attr "fp_type" "<VSfptype_simple>")])
1285 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1286 ;; indicate a combined status
1287 (define_insn "*vsx_eq_<mode>_p"
1290 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1291 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1293 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1294 (eq:VSX_F (match_dup 1)
1296 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1297 "xvcmpeq<VSs>. %x0,%x1,%x2"
1298 [(set_attr "type" "<VStype_simple>")])
1300 (define_insn "*vsx_gt_<mode>_p"
1303 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1304 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1306 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1307 (gt:VSX_F (match_dup 1)
1309 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1310 "xvcmpgt<VSs>. %x0,%x1,%x2"
1311 [(set_attr "type" "<VStype_simple>")])
1313 (define_insn "*vsx_ge_<mode>_p"
1316 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1317 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1319 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1320 (ge:VSX_F (match_dup 1)
1322 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1323 "xvcmpge<VSs>. %x0,%x1,%x2"
1324 [(set_attr "type" "<VStype_simple>")])
1327 (define_insn "*vsx_xxsel<mode>"
1328 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1330 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1331 (match_operand:VSX_L 4 "zero_constant" ""))
1332 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1333 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1334 "VECTOR_MEM_VSX_P (<MODE>mode)"
1335 "xxsel %x0,%x3,%x2,%x1"
1336 [(set_attr "type" "vecperm")])
1338 (define_insn "*vsx_xxsel<mode>_uns"
1339 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1341 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1342 (match_operand:VSX_L 4 "zero_constant" ""))
1343 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1344 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1345 "VECTOR_MEM_VSX_P (<MODE>mode)"
1346 "xxsel %x0,%x3,%x2,%x1"
1347 [(set_attr "type" "vecperm")])
1350 (define_insn "vsx_copysign<mode>3"
1351 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1353 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1354 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
1356 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1357 "xvcpsgn<VSs> %x0,%x2,%x1"
1358 [(set_attr "type" "<VStype_simple>")
1359 (set_attr "fp_type" "<VSfptype_simple>")])
1361 ;; For the conversions, limit the register class for the integer value to be
1362 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
1363 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
1364 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
1365 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
1366 ;; in allowing virtual registers.
1367 (define_insn "vsx_float<VSi><mode>2"
1368 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1369 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1370 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1371 "xvcvsx<VSc><VSs> %x0,%x1"
1372 [(set_attr "type" "<VStype_simple>")
1373 (set_attr "fp_type" "<VSfptype_simple>")])
1375 (define_insn "vsx_floatuns<VSi><mode>2"
1376 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1377 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1378 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1379 "xvcvux<VSc><VSs> %x0,%x1"
1380 [(set_attr "type" "<VStype_simple>")
1381 (set_attr "fp_type" "<VSfptype_simple>")])
1383 (define_insn "vsx_fix_trunc<mode><VSi>2"
1384 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1385 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1386 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1387 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
1388 [(set_attr "type" "<VStype_simple>")
1389 (set_attr "fp_type" "<VSfptype_simple>")])
1391 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
1392 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1393 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1394 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1395 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
1396 [(set_attr "type" "<VStype_simple>")
1397 (set_attr "fp_type" "<VSfptype_simple>")])
1399 ;; Math rounding functions
1400 (define_insn "vsx_x<VSv>r<VSs>i"
1401 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1402 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1403 UNSPEC_VSX_ROUND_I))]
1404 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1405 "x<VSv>r<VSs>i %x0,%x1"
1406 [(set_attr "type" "<VStype_simple>")
1407 (set_attr "fp_type" "<VSfptype_simple>")])
1409 (define_insn "vsx_x<VSv>r<VSs>ic"
1410 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1411 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1412 UNSPEC_VSX_ROUND_IC))]
1413 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1414 "x<VSv>r<VSs>ic %x0,%x1"
1415 [(set_attr "type" "<VStype_simple>")
1416 (set_attr "fp_type" "<VSfptype_simple>")])
1418 (define_insn "vsx_btrunc<mode>2"
1419 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1420 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1421 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1422 "xvr<VSs>iz %x0,%x1"
1423 [(set_attr "type" "<VStype_simple>")
1424 (set_attr "fp_type" "<VSfptype_simple>")])
1426 (define_insn "*vsx_b2trunc<mode>2"
1427 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1428 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1430 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1431 "x<VSv>r<VSs>iz %x0,%x1"
1432 [(set_attr "type" "<VStype_simple>")
1433 (set_attr "fp_type" "<VSfptype_simple>")])
1435 (define_insn "vsx_floor<mode>2"
1436 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1437 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1439 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1440 "xvr<VSs>im %x0,%x1"
1441 [(set_attr "type" "<VStype_simple>")
1442 (set_attr "fp_type" "<VSfptype_simple>")])
1444 (define_insn "vsx_ceil<mode>2"
1445 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1446 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1448 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1449 "xvr<VSs>ip %x0,%x1"
1450 [(set_attr "type" "<VStype_simple>")
1451 (set_attr "fp_type" "<VSfptype_simple>")])
1454 ;; VSX convert to/from double vector
1456 ;; Convert between single and double precision
1457 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
1458 ;; scalar single precision instructions internally use the double format.
1459 ;; Prefer the altivec registers, since we likely will need to do a vperm
1460 (define_insn "vsx_<VS_spdp_insn>"
1461 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
1462 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
1463 UNSPEC_VSX_CVSPDP))]
1464 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1465 "<VS_spdp_insn> %x0,%x1"
1466 [(set_attr "type" "<VS_spdp_type>")])
1468 ;; xscvspdp, represent the scalar SF type as V4SF
1469 (define_insn "vsx_xscvspdp"
1470 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1471 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1472 UNSPEC_VSX_CVSPDP))]
1473 "VECTOR_UNIT_VSX_P (V4SFmode)"
1475 [(set_attr "type" "fp")])
1477 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
1478 ;; format of scalars is actually DF.
1479 (define_insn "vsx_xscvdpsp_scalar"
1480 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1481 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
1482 UNSPEC_VSX_CVSPDP))]
1483 "VECTOR_UNIT_VSX_P (V4SFmode)"
1485 [(set_attr "type" "fp")])
1487 ;; Same as vsx_xscvspdp, but use SF as the type
1488 (define_insn "vsx_xscvspdp_scalar2"
1489 [(set (match_operand:SF 0 "vsx_register_operand" "=f")
1490 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1491 UNSPEC_VSX_CVSPDP))]
1492 "VECTOR_UNIT_VSX_P (V4SFmode)"
1494 [(set_attr "type" "fp")])
1496 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
1497 (define_insn "vsx_xscvdpspn"
1498 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww,?ww")
1499 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
1500 UNSPEC_VSX_CVDPSPN))]
1503 [(set_attr "type" "fp")])
1505 (define_insn "vsx_xscvspdpn"
1506 [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?ws")
1507 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wf,wa")]
1508 UNSPEC_VSX_CVSPDPN))]
1511 [(set_attr "type" "fp")])
1513 (define_insn "vsx_xscvdpspn_scalar"
1514 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,?wa")
1515 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww,ww")]
1516 UNSPEC_VSX_CVDPSPN))]
1519 [(set_attr "type" "fp")])
1521 ;; Used by direct move to move a SFmode value from GPR to VSX register
1522 (define_insn "vsx_xscvspdpn_directmove"
1523 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
1524 (unspec:SF [(match_operand:DI 1 "vsx_register_operand" "wa")]
1525 UNSPEC_VSX_CVSPDPN))]
1528 [(set_attr "type" "fp")])
1530 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
1532 (define_expand "vsx_xvcvsxddp_scale"
1533 [(match_operand:V2DF 0 "vsx_register_operand" "")
1534 (match_operand:V2DI 1 "vsx_register_operand" "")
1535 (match_operand:QI 2 "immediate_operand" "")]
1536 "VECTOR_UNIT_VSX_P (V2DFmode)"
1538 rtx op0 = operands[0];
1539 rtx op1 = operands[1];
1540 int scale = INTVAL(operands[2]);
1541 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
1543 rs6000_scale_v2df (op0, op0, -scale);
1547 (define_insn "vsx_xvcvsxddp"
1548 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1549 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1550 UNSPEC_VSX_XVCVSXDDP))]
1551 "VECTOR_UNIT_VSX_P (V2DFmode)"
1553 [(set_attr "type" "vecdouble")])
1555 (define_expand "vsx_xvcvuxddp_scale"
1556 [(match_operand:V2DF 0 "vsx_register_operand" "")
1557 (match_operand:V2DI 1 "vsx_register_operand" "")
1558 (match_operand:QI 2 "immediate_operand" "")]
1559 "VECTOR_UNIT_VSX_P (V2DFmode)"
1561 rtx op0 = operands[0];
1562 rtx op1 = operands[1];
1563 int scale = INTVAL(operands[2]);
1564 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
1566 rs6000_scale_v2df (op0, op0, -scale);
1570 (define_insn "vsx_xvcvuxddp"
1571 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1572 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1573 UNSPEC_VSX_XVCVUXDDP))]
1574 "VECTOR_UNIT_VSX_P (V2DFmode)"
1576 [(set_attr "type" "vecdouble")])
1578 (define_expand "vsx_xvcvdpsxds_scale"
1579 [(match_operand:V2DI 0 "vsx_register_operand" "")
1580 (match_operand:V2DF 1 "vsx_register_operand" "")
1581 (match_operand:QI 2 "immediate_operand" "")]
1582 "VECTOR_UNIT_VSX_P (V2DFmode)"
1584 rtx op0 = operands[0];
1585 rtx op1 = operands[1];
1586 rtx tmp = gen_reg_rtx (V2DFmode);
1587 int scale = INTVAL(operands[2]);
1589 rs6000_scale_v2df (tmp, op1, scale);
1590 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
1594 (define_insn "vsx_xvcvdpsxds"
1595 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1596 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1597 UNSPEC_VSX_XVCVDPSXDS))]
1598 "VECTOR_UNIT_VSX_P (V2DFmode)"
1599 "xvcvdpsxds %x0,%x1"
1600 [(set_attr "type" "vecdouble")])
1602 (define_expand "vsx_xvcvdpuxds_scale"
1603 [(match_operand:V2DI 0 "vsx_register_operand" "")
1604 (match_operand:V2DF 1 "vsx_register_operand" "")
1605 (match_operand:QI 2 "immediate_operand" "")]
1606 "VECTOR_UNIT_VSX_P (V2DFmode)"
1608 rtx op0 = operands[0];
1609 rtx op1 = operands[1];
1610 rtx tmp = gen_reg_rtx (V2DFmode);
1611 int scale = INTVAL(operands[2]);
1613 rs6000_scale_v2df (tmp, op1, scale);
1614 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
1618 (define_insn "vsx_xvcvdpuxds"
1619 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1620 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1621 UNSPEC_VSX_XVCVDPUXDS))]
1622 "VECTOR_UNIT_VSX_P (V2DFmode)"
1623 "xvcvdpuxds %x0,%x1"
1624 [(set_attr "type" "vecdouble")])
1626 ;; Convert from 64-bit to 32-bit types
1627 ;; Note, favor the Altivec registers since the usual use of these instructions
1628 ;; is in vector converts and we need to use the Altivec vperm instruction.
1630 (define_insn "vsx_xvcvdpsxws"
1631 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1632 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1633 UNSPEC_VSX_CVDPSXWS))]
1634 "VECTOR_UNIT_VSX_P (V2DFmode)"
1635 "xvcvdpsxws %x0,%x1"
1636 [(set_attr "type" "vecdouble")])
1638 (define_insn "vsx_xvcvdpuxws"
1639 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1640 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1641 UNSPEC_VSX_CVDPUXWS))]
1642 "VECTOR_UNIT_VSX_P (V2DFmode)"
1643 "xvcvdpuxws %x0,%x1"
1644 [(set_attr "type" "vecdouble")])
1646 (define_insn "vsx_xvcvsxdsp"
1647 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1648 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1649 UNSPEC_VSX_CVSXDSP))]
1650 "VECTOR_UNIT_VSX_P (V2DFmode)"
1652 [(set_attr "type" "vecfloat")])
1654 (define_insn "vsx_xvcvuxdsp"
1655 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1656 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1657 UNSPEC_VSX_CVUXDSP))]
1658 "VECTOR_UNIT_VSX_P (V2DFmode)"
1660 [(set_attr "type" "vecdouble")])
1662 ;; Convert from 32-bit to 64-bit types
1663 (define_insn "vsx_xvcvsxwdp"
1664 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1665 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1666 UNSPEC_VSX_CVSXWDP))]
1667 "VECTOR_UNIT_VSX_P (V2DFmode)"
1669 [(set_attr "type" "vecdouble")])
1671 (define_insn "vsx_xvcvuxwdp"
1672 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1673 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1674 UNSPEC_VSX_CVUXWDP))]
1675 "VECTOR_UNIT_VSX_P (V2DFmode)"
1677 [(set_attr "type" "vecdouble")])
1679 (define_insn "vsx_xvcvspsxds"
1680 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1681 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1682 UNSPEC_VSX_CVSPSXDS))]
1683 "VECTOR_UNIT_VSX_P (V2DFmode)"
1684 "xvcvspsxds %x0,%x1"
1685 [(set_attr "type" "vecdouble")])
1687 (define_insn "vsx_xvcvspuxds"
1688 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1689 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1690 UNSPEC_VSX_CVSPUXDS))]
1691 "VECTOR_UNIT_VSX_P (V2DFmode)"
1692 "xvcvspuxds %x0,%x1"
1693 [(set_attr "type" "vecdouble")])
1695 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
1696 ;; since the xvrdpiz instruction does not truncate the value if the floating
1697 ;; point value is < LONG_MIN or > LONG_MAX.
1698 (define_insn "*vsx_float_fix_v2df2"
1699 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1702 (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
1703 "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
1704 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
1705 && !flag_trapping_math && TARGET_FRIZ"
1707 [(set_attr "type" "vecdouble")
1708 (set_attr "fp_type" "fp_addsub_d")])
1711 ;; Permute operations
1713 ;; Build a V2DF/V2DI vector from two scalars
1714 (define_insn "vsx_concat_<mode>"
1715 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1717 (match_operand:<VS_scalar> 1 "vsx_register_operand" "<VS_64reg>,<VSa>")
1718 (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")))]
1719 "VECTOR_MEM_VSX_P (<MODE>mode)"
1721 if (BYTES_BIG_ENDIAN)
1722 return "xxpermdi %x0,%x1,%x2,0";
1724 return "xxpermdi %x0,%x2,%x1,0";
1726 [(set_attr "type" "vecperm")])
1728 ;; Special purpose concat using xxpermdi to glue two single precision values
1729 ;; together, relying on the fact that internally scalar floats are represented
1730 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
1731 (define_insn "vsx_concat_v2sf"
1732 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1734 [(match_operand:SF 1 "vsx_register_operand" "f,f")
1735 (match_operand:SF 2 "vsx_register_operand" "f,f")]
1736 UNSPEC_VSX_CONCAT))]
1737 "VECTOR_MEM_VSX_P (V2DFmode)"
1739 if (BYTES_BIG_ENDIAN)
1740 return "xxpermdi %x0,%x1,%x2,0";
1742 return "xxpermdi %x0,%x2,%x1,0";
1744 [(set_attr "type" "vecperm")])
1746 ;; xxpermdi for little endian loads and stores. We need several of
1747 ;; these since the form of the PARALLEL differs by mode.
1748 (define_insn "*vsx_xxpermdi2_le_<mode>"
1749 [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
1751 (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
1752 (parallel [(const_int 1) (const_int 0)])))]
1753 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1754 "xxpermdi %x0,%x1,%x1,2"
1755 [(set_attr "type" "vecperm")])
1757 (define_insn "*vsx_xxpermdi4_le_<mode>"
1758 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
1760 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
1761 (parallel [(const_int 2) (const_int 3)
1762 (const_int 0) (const_int 1)])))]
1763 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1764 "xxpermdi %x0,%x1,%x1,2"
1765 [(set_attr "type" "vecperm")])
1767 (define_insn "*vsx_xxpermdi8_le_V8HI"
1768 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1770 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1771 (parallel [(const_int 4) (const_int 5)
1772 (const_int 6) (const_int 7)
1773 (const_int 0) (const_int 1)
1774 (const_int 2) (const_int 3)])))]
1775 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1776 "xxpermdi %x0,%x1,%x1,2"
1777 [(set_attr "type" "vecperm")])
1779 (define_insn "*vsx_xxpermdi16_le_V16QI"
1780 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1782 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1783 (parallel [(const_int 8) (const_int 9)
1784 (const_int 10) (const_int 11)
1785 (const_int 12) (const_int 13)
1786 (const_int 14) (const_int 15)
1787 (const_int 0) (const_int 1)
1788 (const_int 2) (const_int 3)
1789 (const_int 4) (const_int 5)
1790 (const_int 6) (const_int 7)])))]
1791 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1792 "xxpermdi %x0,%x1,%x1,2"
1793 [(set_attr "type" "vecperm")])
1795 ;; lxvd2x for little endian loads. We need several of
1796 ;; these since the form of the PARALLEL differs by mode.
1797 (define_insn "*vsx_lxvd2x2_le_<mode>"
1798 [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
1800 (match_operand:VSX_LE 1 "memory_operand" "Z")
1801 (parallel [(const_int 1) (const_int 0)])))]
1802 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
1804 [(set_attr "type" "vecload")])
1806 (define_insn "*vsx_lxvd2x4_le_<mode>"
1807 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
1809 (match_operand:VSX_W 1 "memory_operand" "Z")
1810 (parallel [(const_int 2) (const_int 3)
1811 (const_int 0) (const_int 1)])))]
1812 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
1814 [(set_attr "type" "vecload")])
1816 (define_insn "*vsx_lxvd2x8_le_V8HI"
1817 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1819 (match_operand:V8HI 1 "memory_operand" "Z")
1820 (parallel [(const_int 4) (const_int 5)
1821 (const_int 6) (const_int 7)
1822 (const_int 0) (const_int 1)
1823 (const_int 2) (const_int 3)])))]
1824 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
1826 [(set_attr "type" "vecload")])
1828 (define_insn "*vsx_lxvd2x16_le_V16QI"
1829 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1831 (match_operand:V16QI 1 "memory_operand" "Z")
1832 (parallel [(const_int 8) (const_int 9)
1833 (const_int 10) (const_int 11)
1834 (const_int 12) (const_int 13)
1835 (const_int 14) (const_int 15)
1836 (const_int 0) (const_int 1)
1837 (const_int 2) (const_int 3)
1838 (const_int 4) (const_int 5)
1839 (const_int 6) (const_int 7)])))]
1840 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
1842 [(set_attr "type" "vecload")])
1844 ;; stxvd2x for little endian stores. We need several of
1845 ;; these since the form of the PARALLEL differs by mode.
1846 (define_insn "*vsx_stxvd2x2_le_<mode>"
1847 [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
1849 (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
1850 (parallel [(const_int 1) (const_int 0)])))]
1851 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
1853 [(set_attr "type" "vecstore")])
1855 (define_insn "*vsx_stxvd2x4_le_<mode>"
1856 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
1858 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
1859 (parallel [(const_int 2) (const_int 3)
1860 (const_int 0) (const_int 1)])))]
1861 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
1863 [(set_attr "type" "vecstore")])
1865 (define_insn "*vsx_stxvd2x8_le_V8HI"
1866 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1868 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1869 (parallel [(const_int 4) (const_int 5)
1870 (const_int 6) (const_int 7)
1871 (const_int 0) (const_int 1)
1872 (const_int 2) (const_int 3)])))]
1873 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
1875 [(set_attr "type" "vecstore")])
1877 (define_insn "*vsx_stxvd2x16_le_V16QI"
1878 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1880 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1881 (parallel [(const_int 8) (const_int 9)
1882 (const_int 10) (const_int 11)
1883 (const_int 12) (const_int 13)
1884 (const_int 14) (const_int 15)
1885 (const_int 0) (const_int 1)
1886 (const_int 2) (const_int 3)
1887 (const_int 4) (const_int 5)
1888 (const_int 6) (const_int 7)])))]
1889 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
1891 [(set_attr "type" "vecstore")])
1893 ;; Convert a TImode value into V1TImode
1894 (define_expand "vsx_set_v1ti"
1895 [(match_operand:V1TI 0 "nonimmediate_operand" "")
1896 (match_operand:V1TI 1 "nonimmediate_operand" "")
1897 (match_operand:TI 2 "input_operand" "")
1898 (match_operand:QI 3 "u5bit_cint_operand" "")]
1899 "VECTOR_MEM_VSX_P (V1TImode)"
1901 if (operands[3] != const0_rtx)
1904 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
1908 ;; Set the element of a V2DI/VD2F mode
1909 (define_insn "vsx_set_<mode>"
1910 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>")
1912 [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>")
1913 (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")
1914 (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
1916 "VECTOR_MEM_VSX_P (<MODE>mode)"
1918 int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
1919 if (INTVAL (operands[3]) == idx_first)
1920 return \"xxpermdi %x0,%x2,%x1,1\";
1921 else if (INTVAL (operands[3]) == 1 - idx_first)
1922 return \"xxpermdi %x0,%x1,%x2,0\";
1926 [(set_attr "type" "vecperm")])
1928 ;; Extract a DF/DI element from V2DF/V2DI
1929 (define_expand "vsx_extract_<mode>"
1930 [(set (match_operand:<VS_scalar> 0 "register_operand" "")
1931 (vec_select:<VS_scalar> (match_operand:VSX_D 1 "register_operand" "")
1933 [(match_operand:QI 2 "u5bit_cint_operand" "")])))]
1934 "VECTOR_MEM_VSX_P (<MODE>mode)"
1937 ;; Optimize cases were we can do a simple or direct move.
1938 ;; Or see if we can avoid doing the move at all
1939 (define_insn "*vsx_extract_<mode>_internal1"
1940 [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,<VS_64reg>,r,r")
1941 (vec_select:<VS_scalar>
1942 (match_operand:VSX_D 1 "register_operand" "d,<VS_64reg>,<VS_64dm>,<VS_64dm>")
1944 [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD,wL")])))]
1945 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
1947 int op0_regno = REGNO (operands[0]);
1948 int op1_regno = REGNO (operands[1]);
1950 if (op0_regno == op1_regno)
1953 if (INT_REGNO_P (op0_regno))
1954 return ((INTVAL (operands[2]) == VECTOR_ELEMENT_MFVSRLD_64BIT)
1958 if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
1961 return "xxlor %x0,%x1,%x1";
1963 [(set_attr "type" "fp,vecsimple,mftgpr,mftgpr")
1964 (set_attr "length" "4")])
1966 (define_insn "*vsx_extract_<mode>_internal2"
1967 [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=d,<VS_64reg>,<VS_64reg>")
1968 (vec_select:<VS_scalar>
1969 (match_operand:VSX_D 1 "vsx_register_operand" "d,wd,wd")
1970 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "wD,wD,i")])))]
1971 "VECTOR_MEM_VSX_P (<MODE>mode)
1972 && (!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE
1973 || INTVAL (operands[2]) != VECTOR_ELEMENT_SCALAR_64BIT)"
1976 gcc_assert (UINTVAL (operands[2]) <= 1);
1978 if (INTVAL (operands[2]) == VECTOR_ELEMENT_SCALAR_64BIT)
1980 int op0_regno = REGNO (operands[0]);
1981 int op1_regno = REGNO (operands[1]);
1983 if (op0_regno == op1_regno)
1986 if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
1989 return "xxlor %x0,%x1,%x1";
1992 fldDM = INTVAL (operands[2]) << 1;
1993 if (!BYTES_BIG_ENDIAN)
1995 operands[3] = GEN_INT (fldDM);
1996 return "xxpermdi %x0,%x1,%x1,%3";
1998 [(set_attr "type" "fp,vecsimple,vecperm")
1999 (set_attr "length" "4")])
2001 ;; Optimize extracting a single scalar element from memory if the scalar is in
2002 ;; the correct location to use a single load.
2003 (define_insn "*vsx_extract_<mode>_load"
2004 [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,wv,wr")
2005 (vec_select:<VS_scalar>
2006 (match_operand:VSX_D 1 "memory_operand" "m,Z,m")
2007 (parallel [(const_int 0)])))]
2008 "VECTOR_MEM_VSX_P (<MODE>mode)"
2013 [(set_attr "type" "fpload,fpload,load")
2014 (set_attr "length" "4")])
2016 ;; Optimize storing a single scalar element that is the right location to
2018 (define_insn "*vsx_extract_<mode>_store"
2019 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,?Z")
2020 (vec_select:<VS_scalar>
2021 (match_operand:VSX_D 1 "register_operand" "d,wd,<VSa>")
2022 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
2023 "VECTOR_MEM_VSX_P (<MODE>mode)"
2028 [(set_attr "type" "fpstore")
2029 (set_attr "length" "4")])
2031 ;; Extract a SF element from V4SF
2032 (define_insn_and_split "vsx_extract_v4sf"
2033 [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
2035 (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2036 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "O,i")])))
2037 (clobber (match_scratch:V4SF 3 "=X,0"))]
2038 "VECTOR_UNIT_VSX_P (V4SFmode)"
2046 rtx op0 = operands[0];
2047 rtx op1 = operands[1];
2048 rtx op2 = operands[2];
2049 rtx op3 = operands[3];
2051 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
2057 if (GET_CODE (op3) == SCRATCH)
2058 op3 = gen_reg_rtx (V4SFmode);
2059 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
2062 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
2065 [(set_attr "length" "4,8")
2066 (set_attr "type" "fp")])
2068 ;; Expand the builtin form of xxpermdi to canonical rtl.
2069 (define_expand "vsx_xxpermdi_<mode>"
2070 [(match_operand:VSX_L 0 "vsx_register_operand" "")
2071 (match_operand:VSX_L 1 "vsx_register_operand" "")
2072 (match_operand:VSX_L 2 "vsx_register_operand" "")
2073 (match_operand:QI 3 "u5bit_cint_operand" "")]
2074 "VECTOR_MEM_VSX_P (<MODE>mode)"
2076 rtx target = operands[0];
2077 rtx op0 = operands[1];
2078 rtx op1 = operands[2];
2079 int mask = INTVAL (operands[3]);
2080 rtx perm0 = GEN_INT ((mask >> 1) & 1);
2081 rtx perm1 = GEN_INT ((mask & 1) + 2);
2082 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
2084 if (<MODE>mode == V2DFmode)
2085 gen = gen_vsx_xxpermdi2_v2df_1;
2088 gen = gen_vsx_xxpermdi2_v2di_1;
2089 if (<MODE>mode != V2DImode)
2091 target = gen_lowpart (V2DImode, target);
2092 op0 = gen_lowpart (V2DImode, op0);
2093 op1 = gen_lowpart (V2DImode, op1);
2096 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
2097 transformation we don't want; it is necessary for
2098 rs6000_expand_vec_perm_const_1 but not for this use. So we
2099 prepare for that by reversing the transformation here. */
2100 if (BYTES_BIG_ENDIAN)
2101 emit_insn (gen (target, op0, op1, perm0, perm1));
2104 rtx p0 = GEN_INT (3 - INTVAL (perm1));
2105 rtx p1 = GEN_INT (3 - INTVAL (perm0));
2106 emit_insn (gen (target, op1, op0, p0, p1));
2111 (define_insn "vsx_xxpermdi2_<mode>_1"
2112 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
2114 (vec_concat:<VS_double>
2115 (match_operand:VSX_D 1 "vsx_register_operand" "wd")
2116 (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
2117 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2118 (match_operand 4 "const_2_to_3_operand" "")])))]
2119 "VECTOR_MEM_VSX_P (<MODE>mode)"
2123 /* For little endian, swap operands and invert/swap selectors
2124 to get the correct xxpermdi. The operand swap sets up the
2125 inputs as a little endian array. The selectors are swapped
2126 because they are defined to use big endian ordering. The
2127 selectors are inverted to get the correct doublewords for
2128 little endian ordering. */
2129 if (BYTES_BIG_ENDIAN)
2131 op3 = INTVAL (operands[3]);
2132 op4 = INTVAL (operands[4]);
2136 op3 = 3 - INTVAL (operands[4]);
2137 op4 = 3 - INTVAL (operands[3]);
2140 mask = (op3 << 1) | (op4 - 2);
2141 operands[3] = GEN_INT (mask);
2143 if (BYTES_BIG_ENDIAN)
2144 return "xxpermdi %x0,%x1,%x2,%3";
2146 return "xxpermdi %x0,%x2,%x1,%3";
2148 [(set_attr "type" "vecperm")])
2150 (define_expand "vec_perm_const<mode>"
2151 [(match_operand:VSX_D 0 "vsx_register_operand" "")
2152 (match_operand:VSX_D 1 "vsx_register_operand" "")
2153 (match_operand:VSX_D 2 "vsx_register_operand" "")
2154 (match_operand:V2DI 3 "" "")]
2155 "VECTOR_MEM_VSX_P (<MODE>mode)"
2157 if (rs6000_expand_vec_perm_const (operands))
2163 ;; Expanders for builtins
2164 (define_expand "vsx_mergel_<mode>"
2165 [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
2166 (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
2167 (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
2168 "VECTOR_MEM_VSX_P (<MODE>mode)"
2173 /* Special handling for LE with -maltivec=be. */
2174 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
2176 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
2177 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
2181 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
2182 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
2185 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
2186 emit_insn (gen_rtx_SET (operands[0], x));
2190 (define_expand "vsx_mergeh_<mode>"
2191 [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
2192 (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
2193 (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
2194 "VECTOR_MEM_VSX_P (<MODE>mode)"
2199 /* Special handling for LE with -maltivec=be. */
2200 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
2202 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
2203 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
2207 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
2208 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
2211 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
2212 emit_insn (gen_rtx_SET (operands[0], x));
2217 (define_insn "vsx_splat_<mode>"
2218 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?<VSa>,?<VSa>,?<VSa>")
2219 (vec_duplicate:VSX_D
2220 (match_operand:<VS_scalar> 1 "splat_input_operand" "<VS_64reg>,f,Z,<VSa>,<VSa>,Z")))]
2221 "VECTOR_MEM_VSX_P (<MODE>mode)"
2223 xxpermdi %x0,%x1,%x1,0
2224 xxpermdi %x0,%x1,%x1,0
2226 xxpermdi %x0,%x1,%x1,0
2227 xxpermdi %x0,%x1,%x1,0
2229 [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")])
2232 (define_insn "vsx_xxspltw_<mode>"
2233 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2234 (vec_duplicate:VSX_W
2235 (vec_select:<VS_scalar>
2236 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2238 [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
2239 "VECTOR_MEM_VSX_P (<MODE>mode)"
2241 if (!BYTES_BIG_ENDIAN)
2242 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
2244 return "xxspltw %x0,%x1,%2";
2246 [(set_attr "type" "vecperm")])
2248 (define_insn "vsx_xxspltw_<mode>_direct"
2249 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2250 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2251 (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
2252 UNSPEC_VSX_XXSPLTW))]
2253 "VECTOR_MEM_VSX_P (<MODE>mode)"
2254 "xxspltw %x0,%x1,%2"
2255 [(set_attr "type" "vecperm")])
2257 ;; V2DF/V2DI splat for use by vec_splat builtin
2258 (define_insn "vsx_xxspltd_<mode>"
2259 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2260 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
2261 (match_operand:QI 2 "u5bit_cint_operand" "i")]
2262 UNSPEC_VSX_XXSPLTD))]
2263 "VECTOR_MEM_VSX_P (<MODE>mode)"
2265 if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
2266 || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
2267 return "xxpermdi %x0,%x1,%x1,0";
2269 return "xxpermdi %x0,%x1,%x1,3";
2271 [(set_attr "type" "vecperm")])
2273 ;; V4SF/V4SI interleave
2274 (define_insn "vsx_xxmrghw_<mode>"
2275 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2277 (vec_concat:<VS_double>
2278 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2279 (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
2280 (parallel [(const_int 0) (const_int 4)
2281 (const_int 1) (const_int 5)])))]
2282 "VECTOR_MEM_VSX_P (<MODE>mode)"
2284 if (BYTES_BIG_ENDIAN)
2285 return "xxmrghw %x0,%x1,%x2";
2287 return "xxmrglw %x0,%x2,%x1";
2289 [(set_attr "type" "vecperm")])
2291 (define_insn "vsx_xxmrglw_<mode>"
2292 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2294 (vec_concat:<VS_double>
2295 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2296 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
2297 (parallel [(const_int 2) (const_int 6)
2298 (const_int 3) (const_int 7)])))]
2299 "VECTOR_MEM_VSX_P (<MODE>mode)"
2301 if (BYTES_BIG_ENDIAN)
2302 return "xxmrglw %x0,%x1,%x2";
2304 return "xxmrghw %x0,%x2,%x1";
2306 [(set_attr "type" "vecperm")])
2308 ;; Shift left double by word immediate
2309 (define_insn "vsx_xxsldwi_<mode>"
2310 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
2311 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
2312 (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
2313 (match_operand:QI 3 "u5bit_cint_operand" "i")]
2315 "VECTOR_MEM_VSX_P (<MODE>mode)"
2316 "xxsldwi %x0,%x1,%x2,%3"
2317 [(set_attr "type" "vecperm")])
2320 ;; Vector reduction insns and splitters
2322 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
2323 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
2327 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2328 (parallel [(const_int 1)]))
2331 (parallel [(const_int 0)])))
2333 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
2334 "VECTOR_UNIT_VSX_P (V2DFmode)"
2340 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
2341 ? gen_reg_rtx (V2DFmode)
2343 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
2344 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
2347 [(set_attr "length" "8")
2348 (set_attr "type" "veccomplex")])
2350 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
2351 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
2353 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2354 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
2355 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2356 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
2357 "VECTOR_UNIT_VSX_P (V4SFmode)"
2363 rtx op0 = operands[0];
2364 rtx op1 = operands[1];
2365 rtx tmp2, tmp3, tmp4;
2367 if (can_create_pseudo_p ())
2369 tmp2 = gen_reg_rtx (V4SFmode);
2370 tmp3 = gen_reg_rtx (V4SFmode);
2371 tmp4 = gen_reg_rtx (V4SFmode);
2380 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2381 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2382 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2383 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
2386 [(set_attr "length" "16")
2387 (set_attr "type" "veccomplex")])
2389 ;; Combiner patterns with the vector reduction patterns that knows we can get
2390 ;; to the top element of the V2DF array without doing an extract.
2392 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
2393 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
2398 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2399 (parallel [(const_int 1)]))
2402 (parallel [(const_int 0)])))
2404 (parallel [(const_int 1)])))
2405 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
2406 "VECTOR_UNIT_VSX_P (V2DFmode)"
2412 rtx hi = gen_highpart (DFmode, operands[1]);
2413 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
2414 ? gen_reg_rtx (DFmode)
2417 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
2418 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
2421 [(set_attr "length" "8")
2422 (set_attr "type" "veccomplex")])
2424 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
2425 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
2428 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2429 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
2430 (parallel [(const_int 3)])))
2431 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2432 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
2433 (clobber (match_scratch:V4SF 4 "=0,0"))]
2434 "VECTOR_UNIT_VSX_P (V4SFmode)"
2440 rtx op0 = operands[0];
2441 rtx op1 = operands[1];
2442 rtx tmp2, tmp3, tmp4, tmp5;
2444 if (can_create_pseudo_p ())
2446 tmp2 = gen_reg_rtx (V4SFmode);
2447 tmp3 = gen_reg_rtx (V4SFmode);
2448 tmp4 = gen_reg_rtx (V4SFmode);
2449 tmp5 = gen_reg_rtx (V4SFmode);
2459 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2460 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2461 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2462 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
2463 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
2466 [(set_attr "length" "20")
2467 (set_attr "type" "veccomplex")])
2470 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
2472 [(set (match_operand:P 0 "base_reg_operand" "")
2473 (match_operand:P 1 "short_cint_operand" ""))
2474 (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
2475 (mem:VSX_M2 (plus:P (match_dup 0)
2476 (match_operand:P 3 "int_reg_operand" ""))))]
2477 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
2478 "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
2479 [(set_attr "length" "8")
2480 (set_attr "type" "vecload")])
2483 [(set (match_operand:P 0 "base_reg_operand" "")
2484 (match_operand:P 1 "short_cint_operand" ""))
2485 (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
2486 (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
2488 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
2489 "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
2490 [(set_attr "length" "8")
2491 (set_attr "type" "vecload")])