2 ;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Iterator for comparison types
22 (define_code_iterator CMP_TEST [eq lt gt unordered])
24 ;; Mode attribute for vector floate and floato conversions
25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
27 ;; Iterator for both scalar and vector floating point types supported by VSX
28 (define_mode_iterator VSX_B [DF V4SF V2DF])
30 ;; Iterator for the 2 64-bit vector types
31 (define_mode_iterator VSX_D [V2DF V2DI])
33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
34 ;; types that goes in a single vector register.
35 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
36 (TF "FLOAT128_VECTOR_P (TFmode)")
40 ;; Iterator for 128-bit integer types that go in a single vector register.
41 (define_mode_iterator VSX_TI [TI V1TI])
43 ;; Iterator for the 2 32-bit vector types
44 (define_mode_iterator VSX_W [V4SF V4SI])
46 ;; Iterator for the DF types
47 (define_mode_iterator VSX_DF [V2DF DF])
49 ;; Iterator for vector floating point types supported by VSX
50 (define_mode_iterator VSX_F [V4SF V2DF])
52 ;; Iterator for logical types supported by VSX
53 (define_mode_iterator VSX_L [V16QI
61 (KF "FLOAT128_VECTOR_P (KFmode)")
62 (TF "FLOAT128_VECTOR_P (TFmode)")])
64 ;; Iterator for memory moves.
65 (define_mode_iterator VSX_M [V16QI
72 (KF "FLOAT128_VECTOR_P (KFmode)")
73 (TF "FLOAT128_VECTOR_P (TFmode)")
76 (define_mode_attr VSX_XXBR [(V8HI "h")
83 ;; Map into the appropriate load/store name based on the type
84 (define_mode_attr VSm [(V16QI "vw4")
96 ;; Map into the appropriate suffix based on the type
97 (define_mode_attr VSs [(V16QI "sp")
110 ;; Map the register class used
111 (define_mode_attr VSr [(V16QI "v")
125 ;; Map the register class used for float<->int conversions (floating point side)
126 ;; VSr2 is the preferred register class, VSr3 is any register class that will
128 (define_mode_attr VSr2 [(V2DF "wd")
136 (define_mode_attr VSr3 [(V2DF "wa")
144 ;; Map the register class for sp<->dp float conversions, destination
145 (define_mode_attr VSr4 [(SF "ws")
150 ;; Map the register class for sp<->dp float conversions, source
151 (define_mode_attr VSr5 [(SF "ws")
156 ;; The VSX register class that a type can occupy, even if it is not the
157 ;; preferred register class (VSr is the preferred register class that will get
159 (define_mode_attr VSa [(V16QI "wa")
173 ;; A mode attribute to disparage use of GPR registers, except for scalar
175 (define_mode_attr ??r [(V16QI "??r")
186 ;; Same size integer type for floating point data
187 (define_mode_attr VSi [(V4SF "v4si")
191 (define_mode_attr VSI [(V4SF "V4SI")
195 ;; Word size for same size conversion
196 (define_mode_attr VSc [(V4SF "w")
200 ;; Map into either s or v, depending on whether this is a scalar or vector
202 (define_mode_attr VSv [(V16QI "v")
212 ;; Appropriate type for add ops (and other simple FP ops)
213 (define_mode_attr VStype_simple [(V2DF "vecdouble")
217 ;; Appropriate type for multiply ops
218 (define_mode_attr VStype_mul [(V2DF "vecdouble")
222 ;; Appropriate type for divide ops.
223 (define_mode_attr VStype_div [(V2DF "vecdiv")
227 ;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with
229 (define_mode_attr VStype_sqrt [(V2DF "dsqrt")
233 ;; Iterator and modes for sp<->dp conversions
234 ;; Because scalar SF values are represented internally as double, use the
235 ;; V4SF type to represent this than SF.
236 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
238 (define_mode_attr VS_spdp_res [(DF "V4SF")
242 (define_mode_attr VS_spdp_insn [(DF "xscvdpsp")
246 (define_mode_attr VS_spdp_type [(DF "fp")
250 ;; Map the scalar mode for a vector type
251 (define_mode_attr VS_scalar [(V1TI "TI")
259 ;; Map to a double-sized vector mode
260 (define_mode_attr VS_double [(V4SI "V8SI")
266 ;; Map register class for 64-bit element in 128-bit vector for direct moves
268 (define_mode_attr VS_64dm [(V2DF "wk")
271 ;; Map register class for 64-bit element in 128-bit vector for normal register
273 (define_mode_attr VS_64reg [(V2DF "ws")
276 ;; Iterators for loading constants with xxspltib
277 (define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
278 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
280 ;; Vector reverse byte modes
281 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
283 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
284 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
285 ;; done on ISA 2.07 and not just ISA 3.0.
286 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
287 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
289 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
293 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
294 ;; insert to validate the operand number.
295 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
296 (V8HI "const_0_to_7_operand")
297 (V4SI "const_0_to_3_operand")])
299 ;; Mode attribute to give the constraint for vector extract and insert
301 (define_mode_attr VSX_EX [(V16QI "v")
305 ;; Mode iterator for binary floating types other than double to
306 ;; optimize convert to that floating point type from an extract
307 ;; of an integer type
308 (define_mode_iterator VSX_EXTRACT_FL [SF
309 (IF "FLOAT128_2REG_P (IFmode)")
310 (KF "TARGET_FLOAT128_HW")
311 (TF "FLOAT128_2REG_P (TFmode)
312 || (FLOAT128_IEEE_P (TFmode)
313 && TARGET_FLOAT128_HW)")])
315 ;; Mode iterator for binary floating types that have a direct conversion
316 ;; from 64-bit integer to floating point
317 (define_mode_iterator FL_CONV [SF
319 (KF "TARGET_FLOAT128_HW")
320 (TF "TARGET_FLOAT128_HW
321 && FLOAT128_IEEE_P (TFmode)")])
323 ;; Iterator for the 2 short vector types to do a splat from an integer
324 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
326 ;; Mode attribute to give the count for the splat instruction to splat
327 ;; the value in the 64-bit integer slot
328 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
330 ;; Mode attribute to give the suffix for the splat instruction
331 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
333 ;; Constants for creating unspecs
334 (define_c_enum "unspec"
351 UNSPEC_VSX_UNS_FLOAT2
353 UNSPEC_VSX_UNS_FLOATE
355 UNSPEC_VSX_UNS_FLOATO
371 UNSPEC_VSX_XVCVDPSXDS
373 UNSPEC_VSX_XVCVDPUXDS
374 UNSPEC_VSX_SIGN_EXTEND
375 UNSPEC_VSX_XVCVSPSXWS
376 UNSPEC_VSX_XVCVSPSXDS
385 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
386 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
419 UNSPEC_VSX_FIRST_MATCH_INDEX
420 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
421 UNSPEC_VSX_FIRST_MISMATCH_INDEX
422 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
427 ;; The patterns for LE permuted loads and stores come before the general
428 ;; VSX moves so they match first.
429 (define_insn_and_split "*vsx_le_perm_load_<mode>"
430 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
431 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
432 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
438 (parallel [(const_int 1) (const_int 0)])))
442 (parallel [(const_int 1) (const_int 0)])))]
444 rtx mem = operands[1];
446 /* Don't apply the swap optimization if we've already performed register
447 allocation and the hard register destination is not in the altivec
449 if ((MEM_ALIGN (mem) >= 128)
450 && ((reg_or_subregno (operands[0]) >= FIRST_PSEUDO_REGISTER)
451 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
453 rtx mem_address = XEXP (mem, 0);
454 enum machine_mode mode = GET_MODE (mem);
456 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
458 /* Replace the source memory address with masked address. */
459 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
460 emit_insn (lvx_set_expr);
463 else if (rs6000_quadword_masked_address_p (mem_address))
465 /* This rtl is already in the form that matches lvx
466 instruction, so leave it alone. */
469 /* Otherwise, fall through to transform into a swapping load. */
471 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
474 [(set_attr "type" "vecload")
475 (set_attr "length" "8")])
477 (define_insn_and_split "*vsx_le_perm_load_<mode>"
478 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
479 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
480 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
486 (parallel [(const_int 2) (const_int 3)
487 (const_int 0) (const_int 1)])))
491 (parallel [(const_int 2) (const_int 3)
492 (const_int 0) (const_int 1)])))]
494 rtx mem = operands[1];
496 /* Don't apply the swap optimization if we've already performed register
497 allocation and the hard register destination is not in the altivec
499 if ((MEM_ALIGN (mem) >= 128)
500 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
501 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
503 rtx mem_address = XEXP (mem, 0);
504 enum machine_mode mode = GET_MODE (mem);
506 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
508 /* Replace the source memory address with masked address. */
509 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
510 emit_insn (lvx_set_expr);
513 else if (rs6000_quadword_masked_address_p (mem_address))
515 /* This rtl is already in the form that matches lvx
516 instruction, so leave it alone. */
519 /* Otherwise, fall through to transform into a swapping load. */
521 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
524 [(set_attr "type" "vecload")
525 (set_attr "length" "8")])
527 (define_insn_and_split "*vsx_le_perm_load_v8hi"
528 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
529 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
530 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
536 (parallel [(const_int 4) (const_int 5)
537 (const_int 6) (const_int 7)
538 (const_int 0) (const_int 1)
539 (const_int 2) (const_int 3)])))
543 (parallel [(const_int 4) (const_int 5)
544 (const_int 6) (const_int 7)
545 (const_int 0) (const_int 1)
546 (const_int 2) (const_int 3)])))]
548 rtx mem = operands[1];
550 /* Don't apply the swap optimization if we've already performed register
551 allocation and the hard register destination is not in the altivec
553 if ((MEM_ALIGN (mem) >= 128)
554 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
555 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
557 rtx mem_address = XEXP (mem, 0);
558 enum machine_mode mode = GET_MODE (mem);
560 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
562 /* Replace the source memory address with masked address. */
563 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
564 emit_insn (lvx_set_expr);
567 else if (rs6000_quadword_masked_address_p (mem_address))
569 /* This rtl is already in the form that matches lvx
570 instruction, so leave it alone. */
573 /* Otherwise, fall through to transform into a swapping load. */
575 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
578 [(set_attr "type" "vecload")
579 (set_attr "length" "8")])
581 (define_insn_and_split "*vsx_le_perm_load_v16qi"
582 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
583 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
584 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
590 (parallel [(const_int 8) (const_int 9)
591 (const_int 10) (const_int 11)
592 (const_int 12) (const_int 13)
593 (const_int 14) (const_int 15)
594 (const_int 0) (const_int 1)
595 (const_int 2) (const_int 3)
596 (const_int 4) (const_int 5)
597 (const_int 6) (const_int 7)])))
601 (parallel [(const_int 8) (const_int 9)
602 (const_int 10) (const_int 11)
603 (const_int 12) (const_int 13)
604 (const_int 14) (const_int 15)
605 (const_int 0) (const_int 1)
606 (const_int 2) (const_int 3)
607 (const_int 4) (const_int 5)
608 (const_int 6) (const_int 7)])))]
610 rtx mem = operands[1];
612 /* Don't apply the swap optimization if we've already performed register
613 allocation and the hard register destination is not in the altivec
615 if ((MEM_ALIGN (mem) >= 128)
616 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
617 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
619 rtx mem_address = XEXP (mem, 0);
620 enum machine_mode mode = GET_MODE (mem);
622 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
624 /* Replace the source memory address with masked address. */
625 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
626 emit_insn (lvx_set_expr);
629 else if (rs6000_quadword_masked_address_p (mem_address))
631 /* This rtl is already in the form that matches lvx
632 instruction, so leave it alone. */
635 /* Otherwise, fall through to transform into a swapping load. */
637 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
640 [(set_attr "type" "vecload")
641 (set_attr "length" "8")])
643 (define_insn "*vsx_le_perm_store_<mode>"
644 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
645 (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
646 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
648 [(set_attr "type" "vecstore")
649 (set_attr "length" "12")])
652 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
653 (match_operand:VSX_D 1 "vsx_register_operand"))]
654 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
658 (parallel [(const_int 1) (const_int 0)])))
662 (parallel [(const_int 1) (const_int 0)])))]
664 rtx mem = operands[0];
666 /* Don't apply the swap optimization if we've already performed register
667 allocation and the hard register source is not in the altivec range. */
668 if ((MEM_ALIGN (mem) >= 128)
669 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
670 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
672 rtx mem_address = XEXP (mem, 0);
673 enum machine_mode mode = GET_MODE (mem);
674 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
676 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
677 emit_insn (stvx_set_expr);
680 else if (rs6000_quadword_masked_address_p (mem_address))
682 /* This rtl is already in the form that matches stvx instruction,
683 so leave it alone. */
686 /* Otherwise, fall through to transform into a swapping store. */
689 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
693 ;; The post-reload split requires that we re-permute the source
694 ;; register in case it is still live.
696 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
697 (match_operand:VSX_D 1 "vsx_register_operand"))]
698 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
702 (parallel [(const_int 1) (const_int 0)])))
706 (parallel [(const_int 1) (const_int 0)])))
710 (parallel [(const_int 1) (const_int 0)])))]
713 (define_insn "*vsx_le_perm_store_<mode>"
714 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
715 (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
716 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
718 [(set_attr "type" "vecstore")
719 (set_attr "length" "12")])
722 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
723 (match_operand:VSX_W 1 "vsx_register_operand"))]
724 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
728 (parallel [(const_int 2) (const_int 3)
729 (const_int 0) (const_int 1)])))
733 (parallel [(const_int 2) (const_int 3)
734 (const_int 0) (const_int 1)])))]
736 rtx mem = operands[0];
738 /* Don't apply the swap optimization if we've already performed register
739 allocation and the hard register source is not in the altivec range. */
740 if ((MEM_ALIGN (mem) >= 128)
741 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
742 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
744 rtx mem_address = XEXP (mem, 0);
745 enum machine_mode mode = GET_MODE (mem);
746 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
748 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
749 emit_insn (stvx_set_expr);
752 else if (rs6000_quadword_masked_address_p (mem_address))
754 /* This rtl is already in the form that matches stvx instruction,
755 so leave it alone. */
758 /* Otherwise, fall through to transform into a swapping store. */
761 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
765 ;; The post-reload split requires that we re-permute the source
766 ;; register in case it is still live.
768 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
769 (match_operand:VSX_W 1 "vsx_register_operand"))]
770 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
774 (parallel [(const_int 2) (const_int 3)
775 (const_int 0) (const_int 1)])))
779 (parallel [(const_int 2) (const_int 3)
780 (const_int 0) (const_int 1)])))
784 (parallel [(const_int 2) (const_int 3)
785 (const_int 0) (const_int 1)])))]
788 (define_insn "*vsx_le_perm_store_v8hi"
789 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
790 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
791 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
793 [(set_attr "type" "vecstore")
794 (set_attr "length" "12")])
797 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
798 (match_operand:V8HI 1 "vsx_register_operand"))]
799 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
803 (parallel [(const_int 4) (const_int 5)
804 (const_int 6) (const_int 7)
805 (const_int 0) (const_int 1)
806 (const_int 2) (const_int 3)])))
810 (parallel [(const_int 4) (const_int 5)
811 (const_int 6) (const_int 7)
812 (const_int 0) (const_int 1)
813 (const_int 2) (const_int 3)])))]
815 rtx mem = operands[0];
817 /* Don't apply the swap optimization if we've already performed register
818 allocation and the hard register source is not in the altivec range. */
819 if ((MEM_ALIGN (mem) >= 128)
820 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
821 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
823 rtx mem_address = XEXP (mem, 0);
824 enum machine_mode mode = GET_MODE (mem);
825 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
827 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
828 emit_insn (stvx_set_expr);
831 else if (rs6000_quadword_masked_address_p (mem_address))
833 /* This rtl is already in the form that matches stvx instruction,
834 so leave it alone. */
837 /* Otherwise, fall through to transform into a swapping store. */
840 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
844 ;; The post-reload split requires that we re-permute the source
845 ;; register in case it is still live.
847 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
848 (match_operand:V8HI 1 "vsx_register_operand"))]
849 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
853 (parallel [(const_int 4) (const_int 5)
854 (const_int 6) (const_int 7)
855 (const_int 0) (const_int 1)
856 (const_int 2) (const_int 3)])))
860 (parallel [(const_int 4) (const_int 5)
861 (const_int 6) (const_int 7)
862 (const_int 0) (const_int 1)
863 (const_int 2) (const_int 3)])))
867 (parallel [(const_int 4) (const_int 5)
868 (const_int 6) (const_int 7)
869 (const_int 0) (const_int 1)
870 (const_int 2) (const_int 3)])))]
873 (define_insn "*vsx_le_perm_store_v16qi"
874 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
875 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
876 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
878 [(set_attr "type" "vecstore")
879 (set_attr "length" "12")])
882 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
883 (match_operand:V16QI 1 "vsx_register_operand"))]
884 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
888 (parallel [(const_int 8) (const_int 9)
889 (const_int 10) (const_int 11)
890 (const_int 12) (const_int 13)
891 (const_int 14) (const_int 15)
892 (const_int 0) (const_int 1)
893 (const_int 2) (const_int 3)
894 (const_int 4) (const_int 5)
895 (const_int 6) (const_int 7)])))
899 (parallel [(const_int 8) (const_int 9)
900 (const_int 10) (const_int 11)
901 (const_int 12) (const_int 13)
902 (const_int 14) (const_int 15)
903 (const_int 0) (const_int 1)
904 (const_int 2) (const_int 3)
905 (const_int 4) (const_int 5)
906 (const_int 6) (const_int 7)])))]
908 rtx mem = operands[0];
910 /* Don't apply the swap optimization if we've already performed register
911 allocation and the hard register source is not in the altivec range. */
912 if ((MEM_ALIGN (mem) >= 128)
913 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
914 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
916 rtx mem_address = XEXP (mem, 0);
917 enum machine_mode mode = GET_MODE (mem);
918 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
920 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
921 emit_insn (stvx_set_expr);
924 else if (rs6000_quadword_masked_address_p (mem_address))
926 /* This rtl is already in the form that matches stvx instruction,
927 so leave it alone. */
930 /* Otherwise, fall through to transform into a swapping store. */
933 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
937 ;; The post-reload split requires that we re-permute the source
938 ;; register in case it is still live.
940 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
941 (match_operand:V16QI 1 "vsx_register_operand"))]
942 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
946 (parallel [(const_int 8) (const_int 9)
947 (const_int 10) (const_int 11)
948 (const_int 12) (const_int 13)
949 (const_int 14) (const_int 15)
950 (const_int 0) (const_int 1)
951 (const_int 2) (const_int 3)
952 (const_int 4) (const_int 5)
953 (const_int 6) (const_int 7)])))
957 (parallel [(const_int 8) (const_int 9)
958 (const_int 10) (const_int 11)
959 (const_int 12) (const_int 13)
960 (const_int 14) (const_int 15)
961 (const_int 0) (const_int 1)
962 (const_int 2) (const_int 3)
963 (const_int 4) (const_int 5)
964 (const_int 6) (const_int 7)])))
968 (parallel [(const_int 8) (const_int 9)
969 (const_int 10) (const_int 11)
970 (const_int 12) (const_int 13)
971 (const_int 14) (const_int 15)
972 (const_int 0) (const_int 1)
973 (const_int 2) (const_int 3)
974 (const_int 4) (const_int 5)
975 (const_int 6) (const_int 7)])))]
978 ;; Little endian word swapping for 128-bit types that are either scalars or the
979 ;; special V1TI container class, which it is not appropriate to use vec_select
981 (define_insn "*vsx_le_permute_<mode>"
982 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")
984 (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")
986 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
988 xxpermdi %x0,%x1,%x1,2
992 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
993 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
994 [(set_attr "length" "4,4,4,8,8,8")
995 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
997 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
998 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
1001 (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
1004 "!BYTES_BIG_ENDIAN && TARGET_VSX"
1009 [(set (match_dup 0) (match_dup 1))]
1011 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
1013 emit_note (NOTE_INSN_DELETED);
1017 [(set_attr "length" "0,4")
1018 (set_attr "type" "veclogical")])
1020 (define_insn_and_split "*vsx_le_perm_load_<mode>"
1021 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")
1022 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
1023 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1027 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1030 rtx tmp = (can_create_pseudo_p ()
1031 ? gen_reg_rtx_and_attrs (operands[0])
1033 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1034 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1037 [(set_attr "type" "vecload,load")
1038 (set_attr "length" "8,8")])
1040 (define_insn "*vsx_le_perm_store_<mode>"
1041 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1042 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]
1043 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1047 [(set_attr "type" "vecstore,store")
1048 (set_attr "length" "12,8")])
1051 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1052 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1053 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
1056 rtx tmp = (can_create_pseudo_p ()
1057 ? gen_reg_rtx_and_attrs (operands[0])
1059 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1060 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1064 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1065 ;; GPR registers on a little endian system.
1067 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1068 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1070 (set (match_operand:VSX_TI 2 "int_reg_operand")
1071 (rotate:VSX_TI (match_dup 0)
1073 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1074 && (rtx_equal_p (operands[0], operands[2])
1075 || peep2_reg_dead_p (2, operands[0]))"
1076 [(set (match_dup 2) (match_dup 1))])
1079 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1080 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1082 (set (match_operand:VSX_TI 2 "memory_operand")
1083 (rotate:VSX_TI (match_dup 0)
1085 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1086 && peep2_reg_dead_p (2, operands[0])"
1087 [(set (match_dup 2) (match_dup 1))])
1089 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1090 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
1091 ;; floating point are handled by the more generic swap elimination pass.
1093 [(set (match_operand:TI 0 "vsx_register_operand")
1094 (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1096 (set (match_operand:TI 2 "vsx_register_operand")
1097 (rotate:TI (match_dup 0)
1099 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1100 && (rtx_equal_p (operands[0], operands[2])
1101 || peep2_reg_dead_p (2, operands[0]))"
1102 [(set (match_dup 2) (match_dup 1))])
1104 ;; The post-reload split requires that we re-permute the source
1105 ;; register in case it is still live.
1107 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1108 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1109 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1112 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1113 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1114 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1118 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1119 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1120 (define_insn "xxspltib_v16qi"
1121 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1122 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1125 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1126 return "xxspltib %x0,%2";
1128 [(set_attr "type" "vecperm")])
1130 (define_insn "xxspltib_<mode>_nosplit"
1131 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1132 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1135 rtx op1 = operands[1];
1139 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1143 operands[2] = GEN_INT (value & 0xff);
1144 return "xxspltib %x0,%2";
1146 [(set_attr "type" "vecperm")])
1148 (define_insn_and_split "*xxspltib_<mode>_split"
1149 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1150 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1158 rtx op0 = operands[0];
1159 rtx op1 = operands[1];
1160 rtx tmp = ((can_create_pseudo_p ())
1161 ? gen_reg_rtx (V16QImode)
1162 : gen_lowpart (V16QImode, op0));
1164 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1168 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1170 if (<MODE>mode == V2DImode)
1171 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1173 else if (<MODE>mode == V4SImode)
1174 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1176 else if (<MODE>mode == V8HImode)
1177 emit_insn (gen_altivec_vupkhsb (op0, tmp));
1184 [(set_attr "type" "vecperm")
1185 (set_attr "length" "8")])
1188 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
1189 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1190 ;; all 1's, since the machine does not have to wait for the previous
1191 ;; instruction using the register being set (such as a store waiting on a slow
1192 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1194 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
1195 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
1196 ;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
1197 (define_insn "*vsx_mov<mode>_64bit"
1198 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1199 "=ZwO, <VSa>, <VSa>, r, we, ?wQ,
1200 ?&r, ??r, ??Y, <??r>, wo, v,
1201 ?<VSa>, *r, v, ??r, wZ, v")
1203 (match_operand:VSX_M 1 "input_operand"
1204 "<VSa>, ZwO, <VSa>, we, r, r,
1205 wQ, Y, r, r, wE, jwM,
1206 ?jwM, jwM, W, W, v, wZ"))]
1208 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1209 && (register_operand (operands[0], <MODE>mode)
1210 || register_operand (operands[1], <MODE>mode))"
1212 return rs6000_output_move_128bit (operands);
1215 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load,
1216 store, load, store, *, vecsimple, vecsimple,
1217 vecsimple, *, *, *, vecstore, vecload")
1222 4, 8, 20, 20, 4, 4")])
1224 ;; VSX store VSX load VSX move GPR load GPR store GPR move
1225 ;; XXSPLTIB VSPLTISW VSX 0/-1 GPR 0/-1 VMX const GPR const
1226 ;; LVX (VMX) STVX (VMX)
1227 (define_insn "*vsx_mov<mode>_32bit"
1228 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1229 "=ZwO, <VSa>, <VSa>, ??r, ??Y, <??r>,
1230 wo, v, ?<VSa>, *r, v, ??r,
1233 (match_operand:VSX_M 1 "input_operand"
1234 "<VSa>, ZwO, <VSa>, Y, r, r,
1235 wE, jwM, ?jwM, jwM, W, W,
1238 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1239 && (register_operand (operands[0], <MODE>mode)
1240 || register_operand (operands[1], <MODE>mode))"
1242 return rs6000_output_move_128bit (operands);
1245 "vecstore, vecload, vecsimple, load, store, *,
1246 vecsimple, vecsimple, vecsimple, *, *, *,
1250 "4, 4, 4, 16, 16, 16,
1251 4, 4, 4, 16, 20, 32,
1254 ;; Explicit load/store expanders for the builtin functions
1255 (define_expand "vsx_load_<mode>"
1256 [(set (match_operand:VSX_M 0 "vsx_register_operand")
1257 (match_operand:VSX_M 1 "memory_operand"))]
1258 "VECTOR_MEM_VSX_P (<MODE>mode)"
1260 /* Expand to swaps if needed, prior to swap optimization. */
1261 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1263 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1268 (define_expand "vsx_store_<mode>"
1269 [(set (match_operand:VSX_M 0 "memory_operand")
1270 (match_operand:VSX_M 1 "vsx_register_operand"))]
1271 "VECTOR_MEM_VSX_P (<MODE>mode)"
1273 /* Expand to swaps if needed, prior to swap optimization. */
1274 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1276 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1281 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1282 ;; when you really want their element-reversing behavior.
1283 (define_insn "vsx_ld_elemrev_v2di"
1284 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1286 (match_operand:V2DI 1 "memory_operand" "Z")
1287 (parallel [(const_int 1) (const_int 0)])))]
1288 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1290 [(set_attr "type" "vecload")])
1292 (define_insn "vsx_ld_elemrev_v1ti"
1293 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1295 (match_operand:V1TI 1 "memory_operand" "Z")
1296 (parallel [(const_int 0)])))]
1297 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1299 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1301 [(set_attr "type" "vecload")])
1303 (define_insn "vsx_ld_elemrev_v2df"
1304 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1306 (match_operand:V2DF 1 "memory_operand" "Z")
1307 (parallel [(const_int 1) (const_int 0)])))]
1308 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1310 [(set_attr "type" "vecload")])
1312 (define_insn "vsx_ld_elemrev_v4si"
1313 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1315 (match_operand:V4SI 1 "memory_operand" "Z")
1316 (parallel [(const_int 3) (const_int 2)
1317 (const_int 1) (const_int 0)])))]
1318 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1320 [(set_attr "type" "vecload")])
1322 (define_insn "vsx_ld_elemrev_v4sf"
1323 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1325 (match_operand:V4SF 1 "memory_operand" "Z")
1326 (parallel [(const_int 3) (const_int 2)
1327 (const_int 1) (const_int 0)])))]
1328 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1330 [(set_attr "type" "vecload")])
1332 (define_expand "vsx_ld_elemrev_v8hi"
1333 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1335 (match_operand:V8HI 1 "memory_operand" "Z")
1336 (parallel [(const_int 7) (const_int 6)
1337 (const_int 5) (const_int 4)
1338 (const_int 3) (const_int 2)
1339 (const_int 1) (const_int 0)])))]
1340 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1342 if (!TARGET_P9_VECTOR)
1344 rtx tmp = gen_reg_rtx (V4SImode);
1345 rtx subreg, subreg2, perm[16], pcv;
1346 /* 2 is leftmost element in register */
1347 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1350 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1351 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1352 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1354 for (i = 0; i < 16; ++i)
1355 perm[i] = GEN_INT (reorder[i]);
1357 pcv = force_reg (V16QImode,
1358 gen_rtx_CONST_VECTOR (V16QImode,
1359 gen_rtvec_v (16, perm)));
1360 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1366 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1367 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1369 (match_operand:V8HI 1 "memory_operand" "Z")
1370 (parallel [(const_int 7) (const_int 6)
1371 (const_int 5) (const_int 4)
1372 (const_int 3) (const_int 2)
1373 (const_int 1) (const_int 0)])))]
1374 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1376 [(set_attr "type" "vecload")])
1378 (define_expand "vsx_ld_elemrev_v16qi"
1379 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1381 (match_operand:V16QI 1 "memory_operand" "Z")
1382 (parallel [(const_int 15) (const_int 14)
1383 (const_int 13) (const_int 12)
1384 (const_int 11) (const_int 10)
1385 (const_int 9) (const_int 8)
1386 (const_int 7) (const_int 6)
1387 (const_int 5) (const_int 4)
1388 (const_int 3) (const_int 2)
1389 (const_int 1) (const_int 0)])))]
1390 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1392 if (!TARGET_P9_VECTOR)
1394 rtx tmp = gen_reg_rtx (V4SImode);
1395 rtx subreg, subreg2, perm[16], pcv;
1396 /* 3 is leftmost element in register */
1397 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1400 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1401 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1402 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1404 for (i = 0; i < 16; ++i)
1405 perm[i] = GEN_INT (reorder[i]);
1407 pcv = force_reg (V16QImode,
1408 gen_rtx_CONST_VECTOR (V16QImode,
1409 gen_rtvec_v (16, perm)));
1410 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1416 (define_insn "*vsx_ld_elemrev_v16qi_internal"
1417 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1419 (match_operand:V16QI 1 "memory_operand" "Z")
1420 (parallel [(const_int 15) (const_int 14)
1421 (const_int 13) (const_int 12)
1422 (const_int 11) (const_int 10)
1423 (const_int 9) (const_int 8)
1424 (const_int 7) (const_int 6)
1425 (const_int 5) (const_int 4)
1426 (const_int 3) (const_int 2)
1427 (const_int 1) (const_int 0)])))]
1428 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1430 [(set_attr "type" "vecload")])
1432 (define_insn "vsx_st_elemrev_v1ti"
1433 [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1435 (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1436 (parallel [(const_int 0)])))
1437 (clobber (match_dup 1))]
1438 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1440 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1442 [(set_attr "type" "vecstore")])
1444 (define_insn "vsx_st_elemrev_v2df"
1445 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1447 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1448 (parallel [(const_int 1) (const_int 0)])))]
1449 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1451 [(set_attr "type" "vecstore")])
1453 (define_insn "vsx_st_elemrev_v2di"
1454 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1456 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1457 (parallel [(const_int 1) (const_int 0)])))]
1458 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1460 [(set_attr "type" "vecstore")])
1462 (define_insn "vsx_st_elemrev_v4sf"
1463 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1465 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1466 (parallel [(const_int 3) (const_int 2)
1467 (const_int 1) (const_int 0)])))]
1468 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1470 [(set_attr "type" "vecstore")])
1472 (define_insn "vsx_st_elemrev_v4si"
1473 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1475 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1476 (parallel [(const_int 3) (const_int 2)
1477 (const_int 1) (const_int 0)])))]
1478 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1480 [(set_attr "type" "vecstore")])
1482 (define_expand "vsx_st_elemrev_v8hi"
1483 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1485 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1486 (parallel [(const_int 7) (const_int 6)
1487 (const_int 5) (const_int 4)
1488 (const_int 3) (const_int 2)
1489 (const_int 1) (const_int 0)])))]
1490 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1492 if (!TARGET_P9_VECTOR)
1494 rtx mem_subreg, subreg, perm[16], pcv;
1495 rtx tmp = gen_reg_rtx (V8HImode);
1496 /* 2 is leftmost element in register */
1497 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1500 for (i = 0; i < 16; ++i)
1501 perm[i] = GEN_INT (reorder[i]);
1503 pcv = force_reg (V16QImode,
1504 gen_rtx_CONST_VECTOR (V16QImode,
1505 gen_rtvec_v (16, perm)));
1506 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1508 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1509 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1510 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1515 (define_insn "*vsx_st_elemrev_v2di_internal"
1516 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1518 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1519 (parallel [(const_int 1) (const_int 0)])))]
1520 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1522 [(set_attr "type" "vecstore")])
1524 (define_insn "*vsx_st_elemrev_v8hi_internal"
1525 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1527 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1528 (parallel [(const_int 7) (const_int 6)
1529 (const_int 5) (const_int 4)
1530 (const_int 3) (const_int 2)
1531 (const_int 1) (const_int 0)])))]
1532 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1534 [(set_attr "type" "vecstore")])
1536 (define_expand "vsx_st_elemrev_v16qi"
1537 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1539 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1540 (parallel [(const_int 15) (const_int 14)
1541 (const_int 13) (const_int 12)
1542 (const_int 11) (const_int 10)
1543 (const_int 9) (const_int 8)
1544 (const_int 7) (const_int 6)
1545 (const_int 5) (const_int 4)
1546 (const_int 3) (const_int 2)
1547 (const_int 1) (const_int 0)])))]
1548 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1550 if (!TARGET_P9_VECTOR)
1552 rtx mem_subreg, subreg, perm[16], pcv;
1553 rtx tmp = gen_reg_rtx (V16QImode);
1554 /* 3 is leftmost element in register */
1555 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1558 for (i = 0; i < 16; ++i)
1559 perm[i] = GEN_INT (reorder[i]);
1561 pcv = force_reg (V16QImode,
1562 gen_rtx_CONST_VECTOR (V16QImode,
1563 gen_rtvec_v (16, perm)));
1564 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1566 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1567 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1568 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1573 (define_insn "*vsx_st_elemrev_v16qi_internal"
1574 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1576 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1577 (parallel [(const_int 15) (const_int 14)
1578 (const_int 13) (const_int 12)
1579 (const_int 11) (const_int 10)
1580 (const_int 9) (const_int 8)
1581 (const_int 7) (const_int 6)
1582 (const_int 5) (const_int 4)
1583 (const_int 3) (const_int 2)
1584 (const_int 1) (const_int 0)])))]
1585 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1587 [(set_attr "type" "vecstore")])
1590 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1591 ;; instructions are now combined with the insn for the traditional floating
1593 (define_insn "*vsx_add<mode>3"
1594 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1595 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1596 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1597 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1598 "xvadd<VSs> %x0,%x1,%x2"
1599 [(set_attr "type" "<VStype_simple>")])
1601 (define_insn "*vsx_sub<mode>3"
1602 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1603 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1604 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1605 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1606 "xvsub<VSs> %x0,%x1,%x2"
1607 [(set_attr "type" "<VStype_simple>")])
1609 (define_insn "*vsx_mul<mode>3"
1610 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1611 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1612 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1613 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1614 "xvmul<VSs> %x0,%x1,%x2"
1615 [(set_attr "type" "<VStype_simple>")])
1617 ; Emulate vector with scalar for vec_mul in V2DImode
1618 (define_insn_and_split "vsx_mul_v2di"
1619 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1620 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1621 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1623 "VECTOR_MEM_VSX_P (V2DImode)"
1625 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1628 rtx op0 = operands[0];
1629 rtx op1 = operands[1];
1630 rtx op2 = operands[2];
1631 rtx op3 = gen_reg_rtx (DImode);
1632 rtx op4 = gen_reg_rtx (DImode);
1633 rtx op5 = gen_reg_rtx (DImode);
1634 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1635 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1636 if (TARGET_POWERPC64)
1637 emit_insn (gen_muldi3 (op5, op3, op4));
1640 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1641 emit_move_insn (op5, ret);
1643 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1644 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1645 if (TARGET_POWERPC64)
1646 emit_insn (gen_muldi3 (op3, op3, op4));
1649 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1650 emit_move_insn (op3, ret);
1652 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1655 [(set_attr "type" "mul")])
1657 (define_insn "*vsx_div<mode>3"
1658 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1659 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1660 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1661 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1662 "xvdiv<VSs> %x0,%x1,%x2"
1663 [(set_attr "type" "<VStype_div>")])
1665 ; Emulate vector with scalar for vec_div in V2DImode
1666 (define_insn_and_split "vsx_div_v2di"
1667 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1668 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1669 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1671 "VECTOR_MEM_VSX_P (V2DImode)"
1673 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1676 rtx op0 = operands[0];
1677 rtx op1 = operands[1];
1678 rtx op2 = operands[2];
1679 rtx op3 = gen_reg_rtx (DImode);
1680 rtx op4 = gen_reg_rtx (DImode);
1681 rtx op5 = gen_reg_rtx (DImode);
1682 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1683 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1684 if (TARGET_POWERPC64)
1685 emit_insn (gen_divdi3 (op5, op3, op4));
1688 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1689 rtx target = emit_library_call_value (libfunc,
1690 op5, LCT_NORMAL, DImode,
1693 emit_move_insn (op5, target);
1695 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1696 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1697 if (TARGET_POWERPC64)
1698 emit_insn (gen_divdi3 (op3, op3, op4));
1701 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1702 rtx target = emit_library_call_value (libfunc,
1703 op3, LCT_NORMAL, DImode,
1706 emit_move_insn (op3, target);
1708 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1711 [(set_attr "type" "div")])
1713 (define_insn_and_split "vsx_udiv_v2di"
1714 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1715 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1716 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1718 "VECTOR_MEM_VSX_P (V2DImode)"
1720 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1723 rtx op0 = operands[0];
1724 rtx op1 = operands[1];
1725 rtx op2 = operands[2];
1726 rtx op3 = gen_reg_rtx (DImode);
1727 rtx op4 = gen_reg_rtx (DImode);
1728 rtx op5 = gen_reg_rtx (DImode);
1729 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1730 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1731 if (TARGET_POWERPC64)
1732 emit_insn (gen_udivdi3 (op5, op3, op4));
1735 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1736 rtx target = emit_library_call_value (libfunc,
1737 op5, LCT_NORMAL, DImode,
1740 emit_move_insn (op5, target);
1742 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1743 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1744 if (TARGET_POWERPC64)
1745 emit_insn (gen_udivdi3 (op3, op3, op4));
1748 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1749 rtx target = emit_library_call_value (libfunc,
1750 op3, LCT_NORMAL, DImode,
1753 emit_move_insn (op3, target);
1755 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1758 [(set_attr "type" "div")])
1760 ;; *tdiv* instruction returning the FG flag
1761 (define_expand "vsx_tdiv<mode>3_fg"
1763 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1764 (match_operand:VSX_B 2 "vsx_register_operand")]
1766 (set (match_operand:SI 0 "gpc_reg_operand")
1767 (gt:SI (match_dup 3)
1769 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1771 operands[3] = gen_reg_rtx (CCFPmode);
1774 ;; *tdiv* instruction returning the FE flag
1775 (define_expand "vsx_tdiv<mode>3_fe"
1777 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1778 (match_operand:VSX_B 2 "vsx_register_operand")]
1780 (set (match_operand:SI 0 "gpc_reg_operand")
1781 (eq:SI (match_dup 3)
1783 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1785 operands[3] = gen_reg_rtx (CCFPmode);
1788 (define_insn "*vsx_tdiv<mode>3_internal"
1789 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1790 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1791 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1793 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1794 "x<VSv>tdiv<VSs> %0,%x1,%x2"
1795 [(set_attr "type" "<VStype_simple>")])
1797 (define_insn "vsx_fre<mode>2"
1798 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1799 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1801 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1803 [(set_attr "type" "<VStype_simple>")])
1805 (define_insn "*vsx_neg<mode>2"
1806 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1807 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1808 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1809 "xvneg<VSs> %x0,%x1"
1810 [(set_attr "type" "<VStype_simple>")])
1812 (define_insn "*vsx_abs<mode>2"
1813 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1814 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1815 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1816 "xvabs<VSs> %x0,%x1"
1817 [(set_attr "type" "<VStype_simple>")])
1819 (define_insn "vsx_nabs<mode>2"
1820 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1823 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1824 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1825 "xvnabs<VSs> %x0,%x1"
1826 [(set_attr "type" "<VStype_simple>")])
1828 (define_insn "vsx_smax<mode>3"
1829 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1830 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1831 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1832 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1833 "xvmax<VSs> %x0,%x1,%x2"
1834 [(set_attr "type" "<VStype_simple>")])
1836 (define_insn "*vsx_smin<mode>3"
1837 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1838 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1839 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1840 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1841 "xvmin<VSs> %x0,%x1,%x2"
1842 [(set_attr "type" "<VStype_simple>")])
1844 (define_insn "*vsx_sqrt<mode>2"
1845 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1846 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1847 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1848 "xvsqrt<VSs> %x0,%x1"
1849 [(set_attr "type" "<VStype_sqrt>")])
1851 (define_insn "*vsx_rsqrte<mode>2"
1852 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1853 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1855 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1856 "xvrsqrte<VSs> %x0,%x1"
1857 [(set_attr "type" "<VStype_simple>")])
1859 ;; *tsqrt* returning the fg flag
1860 (define_expand "vsx_tsqrt<mode>2_fg"
1862 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1864 (set (match_operand:SI 0 "gpc_reg_operand")
1865 (gt:SI (match_dup 2)
1867 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1869 operands[2] = gen_reg_rtx (CCFPmode);
1872 ;; *tsqrt* returning the fe flag
1873 (define_expand "vsx_tsqrt<mode>2_fe"
1875 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1877 (set (match_operand:SI 0 "gpc_reg_operand")
1878 (eq:SI (match_dup 2)
1880 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1882 operands[2] = gen_reg_rtx (CCFPmode);
1885 (define_insn "*vsx_tsqrt<mode>2_internal"
1886 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1887 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1889 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1890 "x<VSv>tsqrt<VSs> %0,%x1"
1891 [(set_attr "type" "<VStype_simple>")])
1893 ;; Fused vector multiply/add instructions. Support the classical Altivec
1894 ;; versions of fma, which allows the target to be a separate register from the
1895 ;; 3 inputs. Under VSX, the target must be either the addend or the first
1898 (define_insn "*vsx_fmav4sf4"
1899 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1901 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1902 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1903 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1904 "VECTOR_UNIT_VSX_P (V4SFmode)"
1906 xvmaddasp %x0,%x1,%x2
1907 xvmaddmsp %x0,%x1,%x3
1908 xvmaddasp %x0,%x1,%x2
1909 xvmaddmsp %x0,%x1,%x3
1910 vmaddfp %0,%1,%2,%3"
1911 [(set_attr "type" "vecfloat")])
1913 (define_insn "*vsx_fmav2df4"
1914 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1916 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1917 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1918 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1919 "VECTOR_UNIT_VSX_P (V2DFmode)"
1921 xvmaddadp %x0,%x1,%x2
1922 xvmaddmdp %x0,%x1,%x3
1923 xvmaddadp %x0,%x1,%x2
1924 xvmaddmdp %x0,%x1,%x3"
1925 [(set_attr "type" "vecdouble")])
1927 (define_insn "*vsx_fms<mode>4"
1928 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1930 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1931 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1933 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1934 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1936 xvmsuba<VSs> %x0,%x1,%x2
1937 xvmsubm<VSs> %x0,%x1,%x3
1938 xvmsuba<VSs> %x0,%x1,%x2
1939 xvmsubm<VSs> %x0,%x1,%x3"
1940 [(set_attr "type" "<VStype_mul>")])
1942 (define_insn "*vsx_nfma<mode>4"
1943 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1946 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1947 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1948 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1949 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1951 xvnmadda<VSs> %x0,%x1,%x2
1952 xvnmaddm<VSs> %x0,%x1,%x3
1953 xvnmadda<VSs> %x0,%x1,%x2
1954 xvnmaddm<VSs> %x0,%x1,%x3"
1955 [(set_attr "type" "<VStype_mul>")])
1957 (define_insn "*vsx_nfmsv4sf4"
1958 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1961 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1962 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1964 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1965 "VECTOR_UNIT_VSX_P (V4SFmode)"
1967 xvnmsubasp %x0,%x1,%x2
1968 xvnmsubmsp %x0,%x1,%x3
1969 xvnmsubasp %x0,%x1,%x2
1970 xvnmsubmsp %x0,%x1,%x3
1971 vnmsubfp %0,%1,%2,%3"
1972 [(set_attr "type" "vecfloat")])
1974 (define_insn "*vsx_nfmsv2df4"
1975 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1978 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1979 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1981 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1982 "VECTOR_UNIT_VSX_P (V2DFmode)"
1984 xvnmsubadp %x0,%x1,%x2
1985 xvnmsubmdp %x0,%x1,%x3
1986 xvnmsubadp %x0,%x1,%x2
1987 xvnmsubmdp %x0,%x1,%x3"
1988 [(set_attr "type" "vecdouble")])
1990 ;; Vector conditional expressions (no scalar version for these instructions)
1991 (define_insn "vsx_eq<mode>"
1992 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1993 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1994 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1995 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1996 "xvcmpeq<VSs> %x0,%x1,%x2"
1997 [(set_attr "type" "<VStype_simple>")])
1999 (define_insn "vsx_gt<mode>"
2000 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2001 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2002 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2003 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2004 "xvcmpgt<VSs> %x0,%x1,%x2"
2005 [(set_attr "type" "<VStype_simple>")])
2007 (define_insn "*vsx_ge<mode>"
2008 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2009 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2010 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2011 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2012 "xvcmpge<VSs> %x0,%x1,%x2"
2013 [(set_attr "type" "<VStype_simple>")])
2015 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
2016 ;; indicate a combined status
2017 (define_insn "*vsx_eq_<mode>_p"
2018 [(set (reg:CC CR6_REGNO)
2020 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2021 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2023 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2024 (eq:VSX_F (match_dup 1)
2026 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2027 "xvcmpeq<VSs>. %x0,%x1,%x2"
2028 [(set_attr "type" "<VStype_simple>")])
2030 (define_insn "*vsx_gt_<mode>_p"
2031 [(set (reg:CC CR6_REGNO)
2033 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2034 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2036 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2037 (gt:VSX_F (match_dup 1)
2039 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2040 "xvcmpgt<VSs>. %x0,%x1,%x2"
2041 [(set_attr "type" "<VStype_simple>")])
2043 (define_insn "*vsx_ge_<mode>_p"
2044 [(set (reg:CC CR6_REGNO)
2046 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2047 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2049 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2050 (ge:VSX_F (match_dup 1)
2052 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2053 "xvcmpge<VSs>. %x0,%x1,%x2"
2054 [(set_attr "type" "<VStype_simple>")])
2057 (define_insn "*vsx_xxsel<mode>"
2058 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2060 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2061 (match_operand:VSX_L 4 "zero_constant" ""))
2062 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2063 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2064 "VECTOR_MEM_VSX_P (<MODE>mode)"
2065 "xxsel %x0,%x3,%x2,%x1"
2066 [(set_attr "type" "vecmove")])
2068 (define_insn "*vsx_xxsel<mode>_uns"
2069 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2071 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2072 (match_operand:VSX_L 4 "zero_constant" ""))
2073 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2074 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2075 "VECTOR_MEM_VSX_P (<MODE>mode)"
2076 "xxsel %x0,%x3,%x2,%x1"
2077 [(set_attr "type" "vecmove")])
2080 (define_insn "vsx_copysign<mode>3"
2081 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2083 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2084 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
2086 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2087 "xvcpsgn<VSs> %x0,%x2,%x1"
2088 [(set_attr "type" "<VStype_simple>")])
2090 ;; For the conversions, limit the register class for the integer value to be
2091 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2092 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2093 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2094 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2095 ;; in allowing virtual registers.
2096 (define_insn "vsx_float<VSi><mode>2"
2097 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2098 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2099 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2100 "xvcvsx<VSc><VSs> %x0,%x1"
2101 [(set_attr "type" "<VStype_simple>")])
2103 (define_insn "vsx_floatuns<VSi><mode>2"
2104 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2105 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2106 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2107 "xvcvux<VSc><VSs> %x0,%x1"
2108 [(set_attr "type" "<VStype_simple>")])
2110 (define_insn "vsx_fix_trunc<mode><VSi>2"
2111 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2112 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2113 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2114 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
2115 [(set_attr "type" "<VStype_simple>")])
2117 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2118 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2119 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2120 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2121 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
2122 [(set_attr "type" "<VStype_simple>")])
2124 ;; Math rounding functions
2125 (define_insn "vsx_x<VSv>r<VSs>i"
2126 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2127 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2128 UNSPEC_VSX_ROUND_I))]
2129 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2130 "x<VSv>r<VSs>i %x0,%x1"
2131 [(set_attr "type" "<VStype_simple>")])
2133 (define_insn "vsx_x<VSv>r<VSs>ic"
2134 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2135 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2136 UNSPEC_VSX_ROUND_IC))]
2137 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2138 "x<VSv>r<VSs>ic %x0,%x1"
2139 [(set_attr "type" "<VStype_simple>")])
2141 (define_insn "vsx_btrunc<mode>2"
2142 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2143 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
2144 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2145 "xvr<VSs>iz %x0,%x1"
2146 [(set_attr "type" "<VStype_simple>")])
2148 (define_insn "*vsx_b2trunc<mode>2"
2149 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2150 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2152 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2153 "x<VSv>r<VSs>iz %x0,%x1"
2154 [(set_attr "type" "<VStype_simple>")])
2156 (define_insn "vsx_floor<mode>2"
2157 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2158 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2160 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2161 "xvr<VSs>im %x0,%x1"
2162 [(set_attr "type" "<VStype_simple>")])
2164 (define_insn "vsx_ceil<mode>2"
2165 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2166 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2168 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2169 "xvr<VSs>ip %x0,%x1"
2170 [(set_attr "type" "<VStype_simple>")])
2173 ;; VSX convert to/from double vector
2175 ;; Convert between single and double precision
2176 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2177 ;; scalar single precision instructions internally use the double format.
2178 ;; Prefer the altivec registers, since we likely will need to do a vperm
2179 (define_insn "vsx_<VS_spdp_insn>"
2180 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
2181 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
2182 UNSPEC_VSX_CVSPDP))]
2183 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2184 "<VS_spdp_insn> %x0,%x1"
2185 [(set_attr "type" "<VS_spdp_type>")])
2187 ;; xscvspdp, represent the scalar SF type as V4SF
2188 (define_insn "vsx_xscvspdp"
2189 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2190 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2191 UNSPEC_VSX_CVSPDP))]
2192 "VECTOR_UNIT_VSX_P (V4SFmode)"
2194 [(set_attr "type" "fp")])
2196 ;; Same as vsx_xscvspdp, but use SF as the type
2197 (define_insn "vsx_xscvspdp_scalar2"
2198 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2199 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2200 UNSPEC_VSX_CVSPDP))]
2201 "VECTOR_UNIT_VSX_P (V4SFmode)"
2203 [(set_attr "type" "fp")])
2205 ;; Generate xvcvhpsp instruction
2206 (define_insn "vsx_xvcvhpsp"
2207 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2208 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2209 UNSPEC_VSX_CVHPSP))]
2212 [(set_attr "type" "vecfloat")])
2214 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2215 ;; format of scalars is actually DF.
2216 (define_insn "vsx_xscvdpsp_scalar"
2217 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2218 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2219 UNSPEC_VSX_CVSPDP))]
2220 "VECTOR_UNIT_VSX_P (V4SFmode)"
2222 [(set_attr "type" "fp")])
2224 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2225 (define_insn "vsx_xscvdpspn"
2226 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww")
2227 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")]
2228 UNSPEC_VSX_CVDPSPN))]
2231 [(set_attr "type" "fp")])
2233 (define_insn "vsx_xscvspdpn"
2234 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2235 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2236 UNSPEC_VSX_CVSPDPN))]
2239 [(set_attr "type" "fp")])
2241 (define_insn "vsx_xscvdpspn_scalar"
2242 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2243 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2244 UNSPEC_VSX_CVDPSPN))]
2247 [(set_attr "type" "fp")])
2249 ;; Used by direct move to move a SFmode value from GPR to VSX register
2250 (define_insn "vsx_xscvspdpn_directmove"
2251 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2252 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2253 UNSPEC_VSX_CVSPDPN))]
2256 [(set_attr "type" "fp")])
2258 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2260 (define_expand "vsx_xvcvsxddp_scale"
2261 [(match_operand:V2DF 0 "vsx_register_operand")
2262 (match_operand:V2DI 1 "vsx_register_operand")
2263 (match_operand:QI 2 "immediate_operand")]
2264 "VECTOR_UNIT_VSX_P (V2DFmode)"
2266 rtx op0 = operands[0];
2267 rtx op1 = operands[1];
2268 int scale = INTVAL(operands[2]);
2269 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2271 rs6000_scale_v2df (op0, op0, -scale);
2275 (define_insn "vsx_xvcvsxddp"
2276 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2277 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2278 UNSPEC_VSX_XVCVSXDDP))]
2279 "VECTOR_UNIT_VSX_P (V2DFmode)"
2281 [(set_attr "type" "vecdouble")])
2283 (define_expand "vsx_xvcvuxddp_scale"
2284 [(match_operand:V2DF 0 "vsx_register_operand")
2285 (match_operand:V2DI 1 "vsx_register_operand")
2286 (match_operand:QI 2 "immediate_operand")]
2287 "VECTOR_UNIT_VSX_P (V2DFmode)"
2289 rtx op0 = operands[0];
2290 rtx op1 = operands[1];
2291 int scale = INTVAL(operands[2]);
2292 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2294 rs6000_scale_v2df (op0, op0, -scale);
2298 (define_insn "vsx_xvcvuxddp"
2299 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2300 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2301 UNSPEC_VSX_XVCVUXDDP))]
2302 "VECTOR_UNIT_VSX_P (V2DFmode)"
2304 [(set_attr "type" "vecdouble")])
2306 (define_expand "vsx_xvcvdpsxds_scale"
2307 [(match_operand:V2DI 0 "vsx_register_operand")
2308 (match_operand:V2DF 1 "vsx_register_operand")
2309 (match_operand:QI 2 "immediate_operand")]
2310 "VECTOR_UNIT_VSX_P (V2DFmode)"
2312 rtx op0 = operands[0];
2313 rtx op1 = operands[1];
2315 int scale = INTVAL (operands[2]);
2320 tmp = gen_reg_rtx (V2DFmode);
2321 rs6000_scale_v2df (tmp, op1, scale);
2323 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2327 ;; convert vector of 64-bit floating point numbers to vector of
2328 ;; 64-bit signed integer
2329 (define_insn "vsx_xvcvdpsxds"
2330 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2331 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2332 UNSPEC_VSX_XVCVDPSXDS))]
2333 "VECTOR_UNIT_VSX_P (V2DFmode)"
2334 "xvcvdpsxds %x0,%x1"
2335 [(set_attr "type" "vecdouble")])
2337 ;; convert vector of 32-bit floating point numbers to vector of
2338 ;; 32-bit signed integer
2339 (define_insn "vsx_xvcvspsxws"
2340 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2341 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2342 UNSPEC_VSX_XVCVSPSXWS))]
2343 "VECTOR_UNIT_VSX_P (V4SFmode)"
2344 "xvcvspsxws %x0,%x1"
2345 [(set_attr "type" "vecfloat")])
2347 ;; convert vector of 64-bit floating point numbers to vector of
2348 ;; 64-bit unsigned integer
2349 (define_expand "vsx_xvcvdpuxds_scale"
2350 [(match_operand:V2DI 0 "vsx_register_operand")
2351 (match_operand:V2DF 1 "vsx_register_operand")
2352 (match_operand:QI 2 "immediate_operand")]
2353 "VECTOR_UNIT_VSX_P (V2DFmode)"
2355 rtx op0 = operands[0];
2356 rtx op1 = operands[1];
2358 int scale = INTVAL (operands[2]);
2363 tmp = gen_reg_rtx (V2DFmode);
2364 rs6000_scale_v2df (tmp, op1, scale);
2366 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2370 ;; convert vector of 32-bit floating point numbers to vector of
2371 ;; 32-bit unsigned integer
2372 (define_insn "vsx_xvcvspuxws"
2373 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2374 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2375 UNSPEC_VSX_XVCVSPSXWS))]
2376 "VECTOR_UNIT_VSX_P (V4SFmode)"
2377 "xvcvspuxws %x0,%x1"
2378 [(set_attr "type" "vecfloat")])
2380 (define_insn "vsx_xvcvdpuxds"
2381 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2382 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2383 UNSPEC_VSX_XVCVDPUXDS))]
2384 "VECTOR_UNIT_VSX_P (V2DFmode)"
2385 "xvcvdpuxds %x0,%x1"
2386 [(set_attr "type" "vecdouble")])
2388 ;; Convert from 64-bit to 32-bit types
2389 ;; Note, favor the Altivec registers since the usual use of these instructions
2390 ;; is in vector converts and we need to use the Altivec vperm instruction.
2392 (define_insn "vsx_xvcvdpsxws"
2393 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2394 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2395 UNSPEC_VSX_CVDPSXWS))]
2396 "VECTOR_UNIT_VSX_P (V2DFmode)"
2397 "xvcvdpsxws %x0,%x1"
2398 [(set_attr "type" "vecdouble")])
2400 (define_insn "vsx_xvcvdpuxws"
2401 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2402 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2403 UNSPEC_VSX_CVDPUXWS))]
2404 "VECTOR_UNIT_VSX_P (V2DFmode)"
2405 "xvcvdpuxws %x0,%x1"
2406 [(set_attr "type" "vecdouble")])
2408 (define_insn "vsx_xvcvsxdsp"
2409 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2410 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2411 UNSPEC_VSX_CVSXDSP))]
2412 "VECTOR_UNIT_VSX_P (V2DFmode)"
2414 [(set_attr "type" "vecfloat")])
2416 (define_insn "vsx_xvcvuxdsp"
2417 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2418 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2419 UNSPEC_VSX_CVUXDSP))]
2420 "VECTOR_UNIT_VSX_P (V2DFmode)"
2422 [(set_attr "type" "vecdouble")])
2424 (define_insn "vsx_xvcdpsp"
2425 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2426 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
2427 UNSPEC_VSX_XVCDPSP))]
2428 "VECTOR_UNIT_VSX_P (V2DFmode)"
2430 [(set_attr "type" "vecdouble")])
2432 ;; Convert from 32-bit to 64-bit types
2433 ;; Provide both vector and scalar targets
2434 (define_insn "vsx_xvcvsxwdp"
2435 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2436 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2437 UNSPEC_VSX_CVSXWDP))]
2438 "VECTOR_UNIT_VSX_P (V2DFmode)"
2440 [(set_attr "type" "vecdouble")])
2442 (define_insn "vsx_xvcvsxwdp_df"
2443 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2444 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2445 UNSPEC_VSX_CVSXWDP))]
2448 [(set_attr "type" "vecdouble")])
2450 (define_insn "vsx_xvcvuxwdp"
2451 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2452 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2453 UNSPEC_VSX_CVUXWDP))]
2454 "VECTOR_UNIT_VSX_P (V2DFmode)"
2456 [(set_attr "type" "vecdouble")])
2458 (define_insn "vsx_xvcvuxwdp_df"
2459 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2460 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2461 UNSPEC_VSX_CVUXWDP))]
2464 [(set_attr "type" "vecdouble")])
2466 (define_insn "vsx_xvcvspsxds"
2467 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2468 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2469 UNSPEC_VSX_CVSPSXDS))]
2470 "VECTOR_UNIT_VSX_P (V2DFmode)"
2471 "xvcvspsxds %x0,%x1"
2472 [(set_attr "type" "vecdouble")])
2474 (define_insn "vsx_xvcvspuxds"
2475 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2476 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2477 UNSPEC_VSX_CVSPUXDS))]
2478 "VECTOR_UNIT_VSX_P (V2DFmode)"
2479 "xvcvspuxds %x0,%x1"
2480 [(set_attr "type" "vecdouble")])
2482 (define_insn "vsx_xvcvsxwsp"
2483 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2484 (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2485 UNSPEC_VSX_CVSXWSP))]
2486 "VECTOR_UNIT_VSX_P (V4SFmode)"
2488 [(set_attr "type" "vecfloat")])
2490 (define_insn "vsx_xvcvuxwsp"
2491 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2492 (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2493 UNSPEC_VSX_CVUXWSP))]
2494 "VECTOR_UNIT_VSX_P (V4SFmode)"
2496 [(set_attr "type" "vecfloat")])
2498 ;; Generate float2 double
2499 ;; convert two double to float
2500 (define_expand "float2_v2df"
2501 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2502 (use (match_operand:V2DF 1 "register_operand" "wa"))
2503 (use (match_operand:V2DF 2 "register_operand" "wa"))]
2504 "VECTOR_UNIT_VSX_P (V4SFmode)"
2506 rtx rtx_src1, rtx_src2, rtx_dst;
2508 rtx_dst = operands[0];
2509 rtx_src1 = operands[1];
2510 rtx_src2 = operands[2];
2512 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2517 ;; convert two long long signed ints to float
2518 (define_expand "float2_v2di"
2519 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2520 (use (match_operand:V2DI 1 "register_operand" "wa"))
2521 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2522 "VECTOR_UNIT_VSX_P (V4SFmode)"
2524 rtx rtx_src1, rtx_src2, rtx_dst;
2526 rtx_dst = operands[0];
2527 rtx_src1 = operands[1];
2528 rtx_src2 = operands[2];
2530 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2534 ;; Generate uns_float2
2535 ;; convert two long long unsigned ints to float
2536 (define_expand "uns_float2_v2di"
2537 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2538 (use (match_operand:V2DI 1 "register_operand" "wa"))
2539 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2540 "VECTOR_UNIT_VSX_P (V4SFmode)"
2542 rtx rtx_src1, rtx_src2, rtx_dst;
2544 rtx_dst = operands[0];
2545 rtx_src1 = operands[1];
2546 rtx_src2 = operands[2];
2548 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2553 ;; convert double or long long signed to float
2554 ;; (Only even words are valid, BE numbering)
2555 (define_expand "floate<mode>"
2556 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2557 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2558 "VECTOR_UNIT_VSX_P (V4SFmode)"
2560 if (BYTES_BIG_ENDIAN)
2562 /* Shift left one word to put even word correct location */
2564 rtx rtx_val = GEN_INT (4);
2566 rtx_tmp = gen_reg_rtx (V4SFmode);
2567 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2568 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2569 rtx_tmp, rtx_tmp, rtx_val));
2572 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2577 ;; Generate uns_floate
2578 ;; convert long long unsigned to float
2579 ;; (Only even words are valid, BE numbering)
2580 (define_expand "unsfloatev2di"
2581 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2582 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2583 "VECTOR_UNIT_VSX_P (V4SFmode)"
2585 if (BYTES_BIG_ENDIAN)
2587 /* Shift left one word to put even word correct location */
2589 rtx rtx_val = GEN_INT (4);
2591 rtx_tmp = gen_reg_rtx (V4SFmode);
2592 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2593 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2594 rtx_tmp, rtx_tmp, rtx_val));
2597 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2603 ;; convert double or long long signed to float
2604 ;; Only odd words are valid, BE numbering)
2605 (define_expand "floato<mode>"
2606 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2607 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2608 "VECTOR_UNIT_VSX_P (V4SFmode)"
2610 if (BYTES_BIG_ENDIAN)
2611 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2614 /* Shift left one word to put odd word correct location */
2616 rtx rtx_val = GEN_INT (4);
2618 rtx_tmp = gen_reg_rtx (V4SFmode);
2619 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2620 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2621 rtx_tmp, rtx_tmp, rtx_val));
2626 ;; Generate uns_floato
2627 ;; convert long long unsigned to float
2628 ;; (Only odd words are valid, BE numbering)
2629 (define_expand "unsfloatov2di"
2630 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2631 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2632 "VECTOR_UNIT_VSX_P (V4SFmode)"
2634 if (BYTES_BIG_ENDIAN)
2635 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2638 /* Shift left one word to put odd word correct location */
2640 rtx rtx_val = GEN_INT (4);
2642 rtx_tmp = gen_reg_rtx (V4SFmode);
2643 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2644 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2645 rtx_tmp, rtx_tmp, rtx_val));
2650 ;; Generate vsigned2
2651 ;; convert two double float vectors to a vector of single precision ints
2652 (define_expand "vsigned2_v2df"
2653 [(match_operand:V4SI 0 "register_operand" "=wa")
2654 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2655 (match_operand:V2DF 2 "register_operand" "wa")]
2656 UNSPEC_VSX_VSIGNED2)]
2659 rtx rtx_src1, rtx_src2, rtx_dst;
2660 bool signed_convert=true;
2662 rtx_dst = operands[0];
2663 rtx_src1 = operands[1];
2664 rtx_src2 = operands[2];
2666 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2670 ;; Generate vsignedo_v2df
2671 ;; signed double float to int convert odd word
2672 (define_expand "vsignedo_v2df"
2673 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2674 (match_operand:V2DF 1 "register_operand" "wa"))]
2677 if (BYTES_BIG_ENDIAN)
2680 rtx rtx_val = GEN_INT (12);
2681 rtx_tmp = gen_reg_rtx (V4SImode);
2683 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2685 /* Big endian word numbering for words in operand is 0 1 2 3.
2686 take (operand[1] operand[1]) and shift left one word
2687 0 1 2 3 0 1 2 3 => 1 2 3 0
2688 Words 1 and 3 are now are now where they need to be for result. */
2690 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2694 /* Little endian word numbering for operand is 3 2 1 0.
2695 Result words 3 and 1 are where they need to be. */
2696 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2700 [(set_attr "type" "veccomplex")])
2702 ;; Generate vsignede_v2df
2703 ;; signed double float to int even word
2704 (define_expand "vsignede_v2df"
2705 [(set (match_operand:V4SI 0 "register_operand" "=v")
2706 (match_operand:V2DF 1 "register_operand" "v"))]
2709 if (BYTES_BIG_ENDIAN)
2710 /* Big endian word numbering for words in operand is 0 1
2711 Result words 0 is where they need to be. */
2712 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2717 rtx rtx_val = GEN_INT (12);
2718 rtx_tmp = gen_reg_rtx (V4SImode);
2720 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2722 /* Little endian word numbering for operand is 3 2 1 0.
2723 take (operand[1] operand[1]) and shift left three words
2724 0 1 2 3 0 1 2 3 => 3 0 1 2
2725 Words 0 and 2 are now where they need to be for the result. */
2726 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2731 [(set_attr "type" "veccomplex")])
2733 ;; Generate unsigned2
2734 ;; convert two double float vectors to a vector of single precision
2736 (define_expand "vunsigned2_v2df"
2737 [(match_operand:V4SI 0 "register_operand" "=v")
2738 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2739 (match_operand:V2DF 2 "register_operand" "v")]
2740 UNSPEC_VSX_VSIGNED2)]
2743 rtx rtx_src1, rtx_src2, rtx_dst;
2744 bool signed_convert=false;
2746 rtx_dst = operands[0];
2747 rtx_src1 = operands[1];
2748 rtx_src2 = operands[2];
2750 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2754 ;; Generate vunsignedo_v2df
2755 ;; unsigned double float to int convert odd word
2756 (define_expand "vunsignedo_v2df"
2757 [(set (match_operand:V4SI 0 "register_operand" "=v")
2758 (match_operand:V2DF 1 "register_operand" "v"))]
2761 if (BYTES_BIG_ENDIAN)
2764 rtx rtx_val = GEN_INT (12);
2765 rtx_tmp = gen_reg_rtx (V4SImode);
2767 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2769 /* Big endian word numbering for words in operand is 0 1 2 3.
2770 take (operand[1] operand[1]) and shift left one word
2771 0 1 2 3 0 1 2 3 => 1 2 3 0
2772 Words 1 and 3 are now are now where they need to be for result. */
2774 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2778 /* Little endian word numbering for operand is 3 2 1 0.
2779 Result words 3 and 1 are where they need to be. */
2780 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2784 [(set_attr "type" "veccomplex")])
2786 ;; Generate vunsignede_v2df
2787 ;; unsigned double float to int even word
2788 (define_expand "vunsignede_v2df"
2789 [(set (match_operand:V4SI 0 "register_operand" "=v")
2790 (match_operand:V2DF 1 "register_operand" "v"))]
2793 if (BYTES_BIG_ENDIAN)
2794 /* Big endian word numbering for words in operand is 0 1
2795 Result words 0 is where they need to be. */
2796 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2801 rtx rtx_val = GEN_INT (12);
2802 rtx_tmp = gen_reg_rtx (V4SImode);
2804 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2806 /* Little endian word numbering for operand is 3 2 1 0.
2807 take (operand[1] operand[1]) and shift left three words
2808 0 1 2 3 0 1 2 3 => 3 0 1 2
2809 Words 0 and 2 are now where they need to be for the result. */
2810 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2815 [(set_attr "type" "veccomplex")])
2817 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2818 ;; since the xvrdpiz instruction does not truncate the value if the floating
2819 ;; point value is < LONG_MIN or > LONG_MAX.
2820 (define_insn "*vsx_float_fix_v2df2"
2821 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2824 (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
2826 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2827 && !flag_trapping_math && TARGET_FRIZ"
2829 [(set_attr "type" "vecdouble")])
2832 ;; Permute operations
2834 ;; Build a V2DF/V2DI vector from two scalars
2835 (define_insn "vsx_concat_<mode>"
2836 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2838 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2839 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2840 "VECTOR_MEM_VSX_P (<MODE>mode)"
2842 if (which_alternative == 0)
2843 return (BYTES_BIG_ENDIAN
2844 ? "xxpermdi %x0,%x1,%x2,0"
2845 : "xxpermdi %x0,%x2,%x1,0");
2847 else if (which_alternative == 1)
2848 return (BYTES_BIG_ENDIAN
2849 ? "mtvsrdd %x0,%1,%2"
2850 : "mtvsrdd %x0,%2,%1");
2855 [(set_attr "type" "vecperm")])
2857 ;; Combiner patterns to allow creating XXPERMDI's to access either double
2858 ;; word element in a vector register.
2859 (define_insn "*vsx_concat_<mode>_1"
2860 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2862 (vec_select:<VS_scalar>
2863 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2864 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2865 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2866 "VECTOR_MEM_VSX_P (<MODE>mode)"
2868 HOST_WIDE_INT dword = INTVAL (operands[2]);
2869 if (BYTES_BIG_ENDIAN)
2871 operands[4] = GEN_INT (2*dword);
2872 return "xxpermdi %x0,%x1,%x3,%4";
2876 operands[4] = GEN_INT (!dword);
2877 return "xxpermdi %x0,%x3,%x1,%4";
2880 [(set_attr "type" "vecperm")])
2882 (define_insn "*vsx_concat_<mode>_2"
2883 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2885 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2886 (vec_select:<VS_scalar>
2887 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2888 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2889 "VECTOR_MEM_VSX_P (<MODE>mode)"
2891 HOST_WIDE_INT dword = INTVAL (operands[3]);
2892 if (BYTES_BIG_ENDIAN)
2894 operands[4] = GEN_INT (dword);
2895 return "xxpermdi %x0,%x1,%x2,%4";
2899 operands[4] = GEN_INT (2 * !dword);
2900 return "xxpermdi %x0,%x2,%x1,%4";
2903 [(set_attr "type" "vecperm")])
2905 (define_insn "*vsx_concat_<mode>_3"
2906 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2908 (vec_select:<VS_scalar>
2909 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2910 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2911 (vec_select:<VS_scalar>
2912 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2913 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2914 "VECTOR_MEM_VSX_P (<MODE>mode)"
2916 HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2917 HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2918 if (BYTES_BIG_ENDIAN)
2920 operands[5] = GEN_INT ((2 * dword1) + dword2);
2921 return "xxpermdi %x0,%x1,%x3,%5";
2925 operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2926 return "xxpermdi %x0,%x3,%x1,%5";
2929 [(set_attr "type" "vecperm")])
2931 ;; Special purpose concat using xxpermdi to glue two single precision values
2932 ;; together, relying on the fact that internally scalar floats are represented
2933 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
2934 (define_insn "vsx_concat_v2sf"
2935 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2937 [(match_operand:SF 1 "vsx_register_operand" "ww")
2938 (match_operand:SF 2 "vsx_register_operand" "ww")]
2939 UNSPEC_VSX_CONCAT))]
2940 "VECTOR_MEM_VSX_P (V2DFmode)"
2942 if (BYTES_BIG_ENDIAN)
2943 return "xxpermdi %x0,%x1,%x2,0";
2945 return "xxpermdi %x0,%x2,%x1,0";
2947 [(set_attr "type" "vecperm")])
2949 ;; V4SImode initialization splitter
2950 (define_insn_and_split "vsx_init_v4si"
2951 [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
2953 [(match_operand:SI 1 "reg_or_cint_operand" "rn")
2954 (match_operand:SI 2 "reg_or_cint_operand" "rn")
2955 (match_operand:SI 3 "reg_or_cint_operand" "rn")
2956 (match_operand:SI 4 "reg_or_cint_operand" "rn")]
2957 UNSPEC_VSX_VEC_INIT))
2958 (clobber (match_scratch:DI 5 "=&r"))
2959 (clobber (match_scratch:DI 6 "=&r"))]
2960 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2962 "&& reload_completed"
2965 rs6000_split_v4si_init (operands);
2969 ;; xxpermdi for little endian loads and stores. We need several of
2970 ;; these since the form of the PARALLEL differs by mode.
2971 (define_insn "*vsx_xxpermdi2_le_<mode>"
2972 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
2974 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
2975 (parallel [(const_int 1) (const_int 0)])))]
2976 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2977 "xxpermdi %x0,%x1,%x1,2"
2978 [(set_attr "type" "vecperm")])
2980 (define_insn "*vsx_xxpermdi4_le_<mode>"
2981 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
2983 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
2984 (parallel [(const_int 2) (const_int 3)
2985 (const_int 0) (const_int 1)])))]
2986 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2987 "xxpermdi %x0,%x1,%x1,2"
2988 [(set_attr "type" "vecperm")])
2990 (define_insn "*vsx_xxpermdi8_le_V8HI"
2991 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2993 (match_operand:V8HI 1 "vsx_register_operand" "wa")
2994 (parallel [(const_int 4) (const_int 5)
2995 (const_int 6) (const_int 7)
2996 (const_int 0) (const_int 1)
2997 (const_int 2) (const_int 3)])))]
2998 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
2999 "xxpermdi %x0,%x1,%x1,2"
3000 [(set_attr "type" "vecperm")])
3002 (define_insn "*vsx_xxpermdi16_le_V16QI"
3003 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3005 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3006 (parallel [(const_int 8) (const_int 9)
3007 (const_int 10) (const_int 11)
3008 (const_int 12) (const_int 13)
3009 (const_int 14) (const_int 15)
3010 (const_int 0) (const_int 1)
3011 (const_int 2) (const_int 3)
3012 (const_int 4) (const_int 5)
3013 (const_int 6) (const_int 7)])))]
3014 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
3015 "xxpermdi %x0,%x1,%x1,2"
3016 [(set_attr "type" "vecperm")])
3018 ;; lxvd2x for little endian loads. We need several of
3019 ;; these since the form of the PARALLEL differs by mode.
3020 (define_insn "*vsx_lxvd2x2_le_<mode>"
3021 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3023 (match_operand:VSX_D 1 "memory_operand" "Z")
3024 (parallel [(const_int 1) (const_int 0)])))]
3025 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3027 [(set_attr "type" "vecload")])
3029 (define_insn "*vsx_lxvd2x4_le_<mode>"
3030 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3032 (match_operand:VSX_W 1 "memory_operand" "Z")
3033 (parallel [(const_int 2) (const_int 3)
3034 (const_int 0) (const_int 1)])))]
3035 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3037 [(set_attr "type" "vecload")])
3039 (define_insn "*vsx_lxvd2x8_le_V8HI"
3040 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3042 (match_operand:V8HI 1 "memory_operand" "Z")
3043 (parallel [(const_int 4) (const_int 5)
3044 (const_int 6) (const_int 7)
3045 (const_int 0) (const_int 1)
3046 (const_int 2) (const_int 3)])))]
3047 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3049 [(set_attr "type" "vecload")])
3051 (define_insn "*vsx_lxvd2x16_le_V16QI"
3052 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3054 (match_operand:V16QI 1 "memory_operand" "Z")
3055 (parallel [(const_int 8) (const_int 9)
3056 (const_int 10) (const_int 11)
3057 (const_int 12) (const_int 13)
3058 (const_int 14) (const_int 15)
3059 (const_int 0) (const_int 1)
3060 (const_int 2) (const_int 3)
3061 (const_int 4) (const_int 5)
3062 (const_int 6) (const_int 7)])))]
3063 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3065 [(set_attr "type" "vecload")])
3067 ;; stxvd2x for little endian stores. We need several of
3068 ;; these since the form of the PARALLEL differs by mode.
3069 (define_insn "*vsx_stxvd2x2_le_<mode>"
3070 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3072 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3073 (parallel [(const_int 1) (const_int 0)])))]
3074 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3076 [(set_attr "type" "vecstore")])
3078 (define_insn "*vsx_stxvd2x4_le_<mode>"
3079 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3081 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3082 (parallel [(const_int 2) (const_int 3)
3083 (const_int 0) (const_int 1)])))]
3084 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3086 [(set_attr "type" "vecstore")])
3088 (define_insn "*vsx_stxvd2x8_le_V8HI"
3089 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3091 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3092 (parallel [(const_int 4) (const_int 5)
3093 (const_int 6) (const_int 7)
3094 (const_int 0) (const_int 1)
3095 (const_int 2) (const_int 3)])))]
3096 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3098 [(set_attr "type" "vecstore")])
3100 (define_insn "*vsx_stxvd2x16_le_V16QI"
3101 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3103 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3104 (parallel [(const_int 8) (const_int 9)
3105 (const_int 10) (const_int 11)
3106 (const_int 12) (const_int 13)
3107 (const_int 14) (const_int 15)
3108 (const_int 0) (const_int 1)
3109 (const_int 2) (const_int 3)
3110 (const_int 4) (const_int 5)
3111 (const_int 6) (const_int 7)])))]
3112 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3114 [(set_attr "type" "vecstore")])
3116 ;; Convert a TImode value into V1TImode
3117 (define_expand "vsx_set_v1ti"
3118 [(match_operand:V1TI 0 "nonimmediate_operand")
3119 (match_operand:V1TI 1 "nonimmediate_operand")
3120 (match_operand:TI 2 "input_operand")
3121 (match_operand:QI 3 "u5bit_cint_operand")]
3122 "VECTOR_MEM_VSX_P (V1TImode)"
3124 if (operands[3] != const0_rtx)
3127 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3131 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3132 (define_expand "vsx_set_<mode>"
3133 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3134 (use (match_operand:VSX_D 1 "vsx_register_operand"))
3135 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3136 (use (match_operand:QI 3 "const_0_to_1_operand"))]
3137 "VECTOR_MEM_VSX_P (<MODE>mode)"
3139 rtx dest = operands[0];
3140 rtx vec_reg = operands[1];
3141 rtx value = operands[2];
3142 rtx ele = operands[3];
3143 rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3145 if (ele == const0_rtx)
3147 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3148 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3151 else if (ele == const1_rtx)
3153 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3154 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3161 ;; Extract a DF/DI element from V2DF/V2DI
3162 ;; Optimize cases were we can do a simple or direct move.
3163 ;; Or see if we can avoid doing the move at all
3165 ;; There are some unresolved problems with reload that show up if an Altivec
3166 ;; register was picked. Limit the scalar value to FPRs for now.
3168 (define_insn "vsx_extract_<mode>"
3169 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr")
3171 (vec_select:<VS_scalar>
3172 (match_operand:VSX_D 1 "gpc_reg_operand" "<VSa>, <VSa>, wm, wo")
3175 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))]
3176 "VECTOR_MEM_VSX_P (<MODE>mode)"
3178 int element = INTVAL (operands[2]);
3179 int op0_regno = REGNO (operands[0]);
3180 int op1_regno = REGNO (operands[1]);
3183 gcc_assert (IN_RANGE (element, 0, 1));
3184 gcc_assert (VSX_REGNO_P (op1_regno));
3186 if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3188 if (op0_regno == op1_regno)
3189 return ASM_COMMENT_START " vec_extract to same register";
3191 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3192 && TARGET_POWERPC64)
3193 return "mfvsrd %0,%x1";
3195 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3198 else if (VSX_REGNO_P (op0_regno))
3199 return "xxlor %x0,%x1,%x1";
3205 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3206 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3207 return "mfvsrld %0,%x1";
3209 else if (VSX_REGNO_P (op0_regno))
3211 fldDM = element << 1;
3212 if (!BYTES_BIG_ENDIAN)
3214 operands[3] = GEN_INT (fldDM);
3215 return "xxpermdi %x0,%x1,%x1,%3";
3221 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
3223 ;; Optimize extracting a single scalar element from memory.
3224 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3225 [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
3226 (vec_select:<VSX_D:VS_scalar>
3227 (match_operand:VSX_D 1 "memory_operand" "m,m")
3228 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3229 (clobber (match_scratch:P 3 "=&b,&b"))]
3230 "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3232 "&& reload_completed"
3233 [(set (match_dup 0) (match_dup 4))]
3235 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3236 operands[3], <VSX_D:VS_scalar>mode);
3238 [(set_attr "type" "fpload,load")
3239 (set_attr "length" "8")])
3241 ;; Optimize storing a single scalar element that is the right location to
3243 (define_insn "*vsx_extract_<mode>_store"
3244 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3245 (vec_select:<VS_scalar>
3246 (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
3247 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3248 "VECTOR_MEM_VSX_P (<MODE>mode)"
3253 [(set_attr "type" "fpstore")
3254 (set_attr "length" "4")])
3256 ;; Variable V2DI/V2DF extract shift
3257 (define_insn "vsx_vslo_<mode>"
3258 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3259 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3260 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3262 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3264 [(set_attr "type" "vecperm")])
3266 ;; Variable V2DI/V2DF extract
3267 (define_insn_and_split "vsx_extract_<mode>_var"
3268 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
3269 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
3270 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3271 UNSPEC_VSX_EXTRACT))
3272 (clobber (match_scratch:DI 3 "=r,&b,&b"))
3273 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3274 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3276 "&& reload_completed"
3279 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3280 operands[3], operands[4]);
3284 ;; Extract a SF element from V4SF
3285 (define_insn_and_split "vsx_extract_v4sf"
3286 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
3288 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3289 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3290 (clobber (match_scratch:V4SF 3 "=0"))]
3291 "VECTOR_UNIT_VSX_P (V4SFmode)"
3296 rtx op0 = operands[0];
3297 rtx op1 = operands[1];
3298 rtx op2 = operands[2];
3299 rtx op3 = operands[3];
3301 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3307 if (GET_CODE (op3) == SCRATCH)
3308 op3 = gen_reg_rtx (V4SFmode);
3309 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3312 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3315 [(set_attr "length" "8")
3316 (set_attr "type" "fp")])
3318 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3319 [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
3321 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3322 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3323 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3324 "VECTOR_MEM_VSX_P (V4SFmode)"
3326 "&& reload_completed"
3327 [(set (match_dup 0) (match_dup 4))]
3329 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3330 operands[3], SFmode);
3332 [(set_attr "type" "fpload,fpload,fpload,load")
3333 (set_attr "length" "8")])
3335 ;; Variable V4SF extract
3336 (define_insn_and_split "vsx_extract_v4sf_var"
3337 [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
3338 (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
3339 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3340 UNSPEC_VSX_EXTRACT))
3341 (clobber (match_scratch:DI 3 "=r,&b,&b"))
3342 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3343 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3345 "&& reload_completed"
3348 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3349 operands[3], operands[4]);
3353 ;; Expand the builtin form of xxpermdi to canonical rtl.
3354 (define_expand "vsx_xxpermdi_<mode>"
3355 [(match_operand:VSX_L 0 "vsx_register_operand")
3356 (match_operand:VSX_L 1 "vsx_register_operand")
3357 (match_operand:VSX_L 2 "vsx_register_operand")
3358 (match_operand:QI 3 "u5bit_cint_operand")]
3359 "VECTOR_MEM_VSX_P (<MODE>mode)"
3361 rtx target = operands[0];
3362 rtx op0 = operands[1];
3363 rtx op1 = operands[2];
3364 int mask = INTVAL (operands[3]);
3365 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3366 rtx perm1 = GEN_INT ((mask & 1) + 2);
3367 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3369 if (<MODE>mode == V2DFmode)
3370 gen = gen_vsx_xxpermdi2_v2df_1;
3373 gen = gen_vsx_xxpermdi2_v2di_1;
3374 if (<MODE>mode != V2DImode)
3376 target = gen_lowpart (V2DImode, target);
3377 op0 = gen_lowpart (V2DImode, op0);
3378 op1 = gen_lowpart (V2DImode, op1);
3381 emit_insn (gen (target, op0, op1, perm0, perm1));
3385 ;; Special version of xxpermdi that retains big-endian semantics.
3386 (define_expand "vsx_xxpermdi_<mode>_be"
3387 [(match_operand:VSX_L 0 "vsx_register_operand")
3388 (match_operand:VSX_L 1 "vsx_register_operand")
3389 (match_operand:VSX_L 2 "vsx_register_operand")
3390 (match_operand:QI 3 "u5bit_cint_operand")]
3391 "VECTOR_MEM_VSX_P (<MODE>mode)"
3393 rtx target = operands[0];
3394 rtx op0 = operands[1];
3395 rtx op1 = operands[2];
3396 int mask = INTVAL (operands[3]);
3397 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3398 rtx perm1 = GEN_INT ((mask & 1) + 2);
3399 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3401 if (<MODE>mode == V2DFmode)
3402 gen = gen_vsx_xxpermdi2_v2df_1;
3405 gen = gen_vsx_xxpermdi2_v2di_1;
3406 if (<MODE>mode != V2DImode)
3408 target = gen_lowpart (V2DImode, target);
3409 op0 = gen_lowpart (V2DImode, op0);
3410 op1 = gen_lowpart (V2DImode, op1);
3413 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3414 transformation we don't want; it is necessary for
3415 rs6000_expand_vec_perm_const_1 but not for this use. So we
3416 prepare for that by reversing the transformation here. */
3417 if (BYTES_BIG_ENDIAN)
3418 emit_insn (gen (target, op0, op1, perm0, perm1));
3421 rtx p0 = GEN_INT (3 - INTVAL (perm1));
3422 rtx p1 = GEN_INT (3 - INTVAL (perm0));
3423 emit_insn (gen (target, op1, op0, p0, p1));
3428 (define_insn "vsx_xxpermdi2_<mode>_1"
3429 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
3431 (vec_concat:<VS_double>
3432 (match_operand:VSX_D 1 "vsx_register_operand" "wd")
3433 (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
3434 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3435 (match_operand 4 "const_2_to_3_operand" "")])))]
3436 "VECTOR_MEM_VSX_P (<MODE>mode)"
3440 /* For little endian, swap operands and invert/swap selectors
3441 to get the correct xxpermdi. The operand swap sets up the
3442 inputs as a little endian array. The selectors are swapped
3443 because they are defined to use big endian ordering. The
3444 selectors are inverted to get the correct doublewords for
3445 little endian ordering. */
3446 if (BYTES_BIG_ENDIAN)
3448 op3 = INTVAL (operands[3]);
3449 op4 = INTVAL (operands[4]);
3453 op3 = 3 - INTVAL (operands[4]);
3454 op4 = 3 - INTVAL (operands[3]);
3457 mask = (op3 << 1) | (op4 - 2);
3458 operands[3] = GEN_INT (mask);
3460 if (BYTES_BIG_ENDIAN)
3461 return "xxpermdi %x0,%x1,%x2,%3";
3463 return "xxpermdi %x0,%x2,%x1,%3";
3465 [(set_attr "type" "vecperm")])
3467 ;; Extraction of a single element in a small integer vector. Until ISA 3.0,
3468 ;; none of the small types were allowed in a vector register, so we had to
3469 ;; extract to a DImode and either do a direct move or store.
3470 (define_expand "vsx_extract_<mode>"
3471 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3472 (vec_select:<VS_scalar>
3473 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3474 (parallel [(match_operand:QI 2 "const_int_operand")])))
3475 (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3476 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3478 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
3479 if (TARGET_P9_VECTOR)
3481 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3487 (define_insn "vsx_extract_<mode>_p9"
3488 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3489 (vec_select:<VS_scalar>
3490 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3491 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3492 (clobber (match_scratch:SI 3 "=r,X"))]
3493 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3495 if (which_alternative == 0)
3500 HOST_WIDE_INT elt = INTVAL (operands[2]);
3501 HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3502 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3505 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3506 HOST_WIDE_INT offset = unit_size * elt_adj;
3508 operands[2] = GEN_INT (offset);
3510 return "xxextractuw %x0,%x1,%2";
3512 return "vextractu<wd> %0,%1,%2";
3515 [(set_attr "type" "vecsimple")])
3518 [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3519 (vec_select:<VS_scalar>
3520 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3521 (parallel [(match_operand:QI 2 "const_int_operand")])))
3522 (clobber (match_operand:SI 3 "int_reg_operand"))]
3523 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3526 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3527 rtx op1 = operands[1];
3528 rtx op2 = operands[2];
3529 rtx op3 = operands[3];
3530 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3532 emit_move_insn (op3, GEN_INT (offset));
3533 if (BYTES_BIG_ENDIAN)
3534 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3536 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3540 ;; Optimize zero extracts to eliminate the AND after the extract.
3541 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3542 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3544 (vec_select:<VS_scalar>
3545 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3546 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3547 (clobber (match_scratch:SI 3 "=r,X"))]
3548 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3550 "&& reload_completed"
3551 [(parallel [(set (match_dup 4)
3552 (vec_select:<VS_scalar>
3554 (parallel [(match_dup 2)])))
3555 (clobber (match_dup 3))])]
3557 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3560 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3561 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3562 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3563 (vec_select:<VS_scalar>
3564 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3565 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3566 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3567 (clobber (match_scratch:SI 4 "=X,&r"))]
3568 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3570 "&& reload_completed"
3571 [(parallel [(set (match_dup 3)
3572 (vec_select:<VS_scalar>
3574 (parallel [(match_dup 2)])))
3575 (clobber (match_dup 4))])
3579 (define_insn_and_split "*vsx_extract_si"
3580 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
3582 (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
3583 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3584 (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
3585 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3587 "&& reload_completed"
3590 rtx dest = operands[0];
3591 rtx src = operands[1];
3592 rtx element = operands[2];
3593 rtx vec_tmp = operands[3];
3596 if (!BYTES_BIG_ENDIAN)
3597 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3599 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3601 value = INTVAL (element);
3603 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3607 if (MEM_P (operands[0]))
3609 if (can_create_pseudo_p ())
3610 dest = rs6000_address_for_fpconvert (dest);
3612 if (TARGET_P8_VECTOR)
3613 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3615 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3618 else if (TARGET_P8_VECTOR)
3619 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3621 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3622 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3626 [(set_attr "type" "mftgpr,vecperm,fpstore")
3627 (set_attr "length" "8")])
3629 (define_insn_and_split "*vsx_extract_<mode>_p8"
3630 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3631 (vec_select:<VS_scalar>
3632 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3633 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3634 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3635 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3636 && !TARGET_P9_VECTOR"
3638 "&& reload_completed"
3641 rtx dest = operands[0];
3642 rtx src = operands[1];
3643 rtx element = operands[2];
3644 rtx vec_tmp = operands[3];
3647 if (!BYTES_BIG_ENDIAN)
3648 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3650 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3652 value = INTVAL (element);
3653 if (<MODE>mode == V16QImode)
3656 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3660 else if (<MODE>mode == V8HImode)
3663 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3670 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3671 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3674 [(set_attr "type" "mftgpr")])
3676 ;; Optimize extracting a single scalar element from memory.
3677 (define_insn_and_split "*vsx_extract_<mode>_load"
3678 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3679 (vec_select:<VS_scalar>
3680 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3681 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3682 (clobber (match_scratch:DI 3 "=&b"))]
3683 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3685 "&& reload_completed"
3686 [(set (match_dup 0) (match_dup 4))]
3688 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3689 operands[3], <VS_scalar>mode);
3691 [(set_attr "type" "load")
3692 (set_attr "length" "8")])
3694 ;; Variable V16QI/V8HI/V4SI extract
3695 (define_insn_and_split "vsx_extract_<mode>_var"
3696 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3698 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3699 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3700 UNSPEC_VSX_EXTRACT))
3701 (clobber (match_scratch:DI 3 "=r,r,&b"))
3702 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3703 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3705 "&& reload_completed"
3708 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3709 operands[3], operands[4]);
3713 (define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
3714 [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
3716 (unspec:<VSX_EXTRACT_I:VS_scalar>
3717 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3718 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3719 UNSPEC_VSX_EXTRACT)))
3720 (clobber (match_scratch:DI 3 "=r,r,&b"))
3721 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3722 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3724 "&& reload_completed"
3727 machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
3728 rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3729 operands[1], operands[2],
3730 operands[3], operands[4]);
3734 ;; VSX_EXTRACT optimizations
3735 ;; Optimize double d = (double) vec_extract (vi, <n>)
3736 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3737 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
3738 [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
3741 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3742 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3743 (clobber (match_scratch:V4SI 3 "=v"))]
3744 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3749 rtx dest = operands[0];
3750 rtx src = operands[1];
3751 rtx element = operands[2];
3752 rtx v4si_tmp = operands[3];
3755 if (!BYTES_BIG_ENDIAN)
3756 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3758 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3760 value = INTVAL (element);
3763 if (GET_CODE (v4si_tmp) == SCRATCH)
3764 v4si_tmp = gen_reg_rtx (V4SImode);
3765 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3770 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3774 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3775 ;; where <type> is a floating point type that supported by the hardware that is
3776 ;; not double. First convert the value to double, and then to the desired
3778 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3779 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
3780 (any_float:VSX_EXTRACT_FL
3782 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3783 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3784 (clobber (match_scratch:V4SI 3 "=v"))
3785 (clobber (match_scratch:DF 4 "=ws"))]
3786 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3791 rtx dest = operands[0];
3792 rtx src = operands[1];
3793 rtx element = operands[2];
3794 rtx v4si_tmp = operands[3];
3795 rtx df_tmp = operands[4];
3798 if (!BYTES_BIG_ENDIAN)
3799 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3801 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3803 value = INTVAL (element);
3806 if (GET_CODE (v4si_tmp) == SCRATCH)
3807 v4si_tmp = gen_reg_rtx (V4SImode);
3808 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3813 if (GET_CODE (df_tmp) == SCRATCH)
3814 df_tmp = gen_reg_rtx (DFmode);
3816 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3818 if (<MODE>mode == SFmode)
3819 emit_insn (gen_truncdfsf2 (dest, df_tmp));
3820 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3821 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3822 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3823 && TARGET_FLOAT128_HW)
3824 emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3825 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3826 emit_insn (gen_extenddfif2 (dest, df_tmp));
3827 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3828 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3835 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3836 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3837 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3838 ;; vector short or vector unsigned short.
3839 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3840 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3842 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3843 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3844 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3845 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3846 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3847 && TARGET_P9_VECTOR"
3849 "&& reload_completed"
3850 [(parallel [(set (match_dup 3)
3851 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3853 (parallel [(match_dup 2)])))
3854 (clobber (scratch:SI))])
3856 (sign_extend:DI (match_dup 3)))
3858 (float:<FL_CONV:MODE> (match_dup 4)))]
3860 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3863 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3864 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3865 (unsigned_float:FL_CONV
3866 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3867 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3868 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3869 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3870 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3871 && TARGET_P9_VECTOR"
3873 "&& reload_completed"
3874 [(parallel [(set (match_dup 3)
3875 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3877 (parallel [(match_dup 2)])))
3878 (clobber (scratch:SI))])
3880 (float:<FL_CONV:MODE> (match_dup 4)))]
3882 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3885 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
3886 (define_insn "vsx_set_<mode>_p9"
3887 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3888 (unspec:VSX_EXTRACT_I
3889 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3890 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3891 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3893 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3895 int ele = INTVAL (operands[3]);
3896 int nunits = GET_MODE_NUNITS (<MODE>mode);
3898 if (!BYTES_BIG_ENDIAN)
3899 ele = nunits - 1 - ele;
3901 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3902 if (<MODE>mode == V4SImode)
3903 return "xxinsertw %x0,%x2,%3";
3905 return "vinsert<wd> %0,%2,%3";
3907 [(set_attr "type" "vecperm")])
3909 (define_insn_and_split "vsx_set_v4sf_p9"
3910 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3912 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3913 (match_operand:SF 2 "gpc_reg_operand" "ww")
3914 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3916 (clobber (match_scratch:SI 4 "=&wJwK"))]
3917 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3919 "&& reload_completed"
3921 (unspec:V4SF [(match_dup 2)]
3922 UNSPEC_VSX_CVDPSPN))
3923 (parallel [(set (match_dup 4)
3924 (vec_select:SI (match_dup 6)
3925 (parallel [(match_dup 7)])))
3926 (clobber (scratch:SI))])
3928 (unspec:V4SI [(match_dup 8)
3933 unsigned int tmp_regno = reg_or_subregno (operands[4]);
3935 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3936 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3937 operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 2);
3938 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3940 [(set_attr "type" "vecperm")
3941 (set_attr "length" "12")])
3943 ;; Special case setting 0.0f to a V4SF element
3944 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
3945 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3947 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3948 (match_operand:SF 2 "zero_fp_constant" "j")
3949 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3951 (clobber (match_scratch:SI 4 "=&wJwK"))]
3952 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3954 "&& reload_completed"
3958 (unspec:V4SI [(match_dup 5)
3963 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3965 [(set_attr "type" "vecperm")
3966 (set_attr "length" "8")])
3968 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
3969 ;; that is in the default scalar position (1 for big endian, 2 for little
3970 ;; endian). We just need to do an xxinsertw since the element is in the
3971 ;; correct location.
3973 (define_insn "*vsx_insert_extract_v4sf_p9"
3974 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3976 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3977 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
3979 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
3980 (match_operand:QI 4 "const_0_to_3_operand" "n")]
3982 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
3983 && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
3985 int ele = INTVAL (operands[4]);
3987 if (!BYTES_BIG_ENDIAN)
3988 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
3990 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
3991 return "xxinsertw %x0,%x2,%4";
3993 [(set_attr "type" "vecperm")])
3995 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
3996 ;; that is in the default scalar position (1 for big endian, 2 for little
3997 ;; endian). Convert the insert/extract to int and avoid doing the conversion.
3999 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4000 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4002 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4003 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4005 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4006 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4008 (clobber (match_scratch:SI 5 "=&wJwK"))]
4009 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4010 && TARGET_P9_VECTOR && TARGET_POWERPC64
4011 && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4014 [(parallel [(set (match_dup 5)
4015 (vec_select:SI (match_dup 6)
4016 (parallel [(match_dup 3)])))
4017 (clobber (scratch:SI))])
4019 (unspec:V4SI [(match_dup 8)
4024 if (GET_CODE (operands[5]) == SCRATCH)
4025 operands[5] = gen_reg_rtx (SImode);
4027 operands[6] = gen_lowpart (V4SImode, operands[2]);
4028 operands[7] = gen_lowpart (V4SImode, operands[0]);
4029 operands[8] = gen_lowpart (V4SImode, operands[1]);
4031 [(set_attr "type" "vecperm")])
4033 ;; Expanders for builtins
4034 (define_expand "vsx_mergel_<mode>"
4035 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4036 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4037 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4038 "VECTOR_MEM_VSX_P (<MODE>mode)"
4040 rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4041 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4042 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4043 emit_insn (gen_rtx_SET (operands[0], x));
4047 (define_expand "vsx_mergeh_<mode>"
4048 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4049 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4050 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4051 "VECTOR_MEM_VSX_P (<MODE>mode)"
4053 rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4054 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4055 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4056 emit_insn (gen_rtx_SET (operands[0], x));
4061 ;; We separate the register splat insn from the memory splat insn to force the
4062 ;; register allocator to generate the indexed form of the SPLAT when it is
4063 ;; given an offsettable memory reference. Otherwise, if the register and
4064 ;; memory insns were combined into a single insn, the register allocator will
4065 ;; load the value into a register, and then do a double word permute.
4066 (define_expand "vsx_splat_<mode>"
4067 [(set (match_operand:VSX_D 0 "vsx_register_operand")
4068 (vec_duplicate:VSX_D
4069 (match_operand:<VS_scalar> 1 "input_operand")))]
4070 "VECTOR_MEM_VSX_P (<MODE>mode)"
4072 rtx op1 = operands[1];
4074 operands[1] = rs6000_address_for_fpconvert (op1);
4075 else if (!REG_P (op1))
4076 op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4079 (define_insn "vsx_splat_<mode>_reg"
4080 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
4081 (vec_duplicate:VSX_D
4082 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
4083 "VECTOR_MEM_VSX_P (<MODE>mode)"
4085 xxpermdi %x0,%x1,%x1,0
4087 [(set_attr "type" "vecperm")])
4089 (define_insn "vsx_splat_<VSX_D:mode>_mem"
4090 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")
4091 (vec_duplicate:VSX_D
4092 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4093 "VECTOR_MEM_VSX_P (<MODE>mode)"
4095 [(set_attr "type" "vecload")])
4097 ;; V4SI splat support
4098 (define_insn "vsx_splat_v4si"
4099 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4101 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4106 [(set_attr "type" "vecperm,vecload")])
4108 ;; SImode is not currently allowed in vector registers. This pattern
4109 ;; allows us to use direct move to get the value in a vector register
4110 ;; so that we can use XXSPLTW
4111 (define_insn "vsx_splat_v4si_di"
4112 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4115 (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
4116 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4120 [(set_attr "type" "vecperm")])
4122 ;; V4SF splat (ISA 3.0)
4123 (define_insn_and_split "vsx_splat_v4sf"
4124 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4126 (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
4132 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4134 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4136 (unspec:V4SF [(match_dup 0)
4137 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4139 [(set_attr "type" "vecload,vecperm,mftgpr")
4140 (set_attr "length" "4,8,4")])
4142 ;; V4SF/V4SI splat from a vector element
4143 (define_insn "vsx_xxspltw_<mode>"
4144 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4145 (vec_duplicate:VSX_W
4146 (vec_select:<VS_scalar>
4147 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4149 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4150 "VECTOR_MEM_VSX_P (<MODE>mode)"
4152 if (!BYTES_BIG_ENDIAN)
4153 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4155 return "xxspltw %x0,%x1,%2";
4157 [(set_attr "type" "vecperm")])
4159 (define_insn "vsx_xxspltw_<mode>_direct"
4160 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4161 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4162 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4163 UNSPEC_VSX_XXSPLTW))]
4164 "VECTOR_MEM_VSX_P (<MODE>mode)"
4165 "xxspltw %x0,%x1,%2"
4166 [(set_attr "type" "vecperm")])
4168 ;; V16QI/V8HI splat support on ISA 2.07
4169 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4170 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4171 (vec_duplicate:VSX_SPLAT_I
4172 (truncate:<VS_scalar>
4173 (match_operand:DI 1 "altivec_register_operand" "v"))))]
4174 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4175 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4176 [(set_attr "type" "vecperm")])
4178 ;; V2DF/V2DI splat for use by vec_splat builtin
4179 (define_insn "vsx_xxspltd_<mode>"
4180 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4181 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4182 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4183 UNSPEC_VSX_XXSPLTD))]
4184 "VECTOR_MEM_VSX_P (<MODE>mode)"
4186 if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4187 || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4188 return "xxpermdi %x0,%x1,%x1,0";
4190 return "xxpermdi %x0,%x1,%x1,3";
4192 [(set_attr "type" "vecperm")])
4194 ;; V4SF/V4SI interleave
4195 (define_insn "vsx_xxmrghw_<mode>"
4196 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4198 (vec_concat:<VS_double>
4199 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4200 (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
4201 (parallel [(const_int 0) (const_int 4)
4202 (const_int 1) (const_int 5)])))]
4203 "VECTOR_MEM_VSX_P (<MODE>mode)"
4205 if (BYTES_BIG_ENDIAN)
4206 return "xxmrghw %x0,%x1,%x2";
4208 return "xxmrglw %x0,%x2,%x1";
4210 [(set_attr "type" "vecperm")])
4212 (define_insn "vsx_xxmrglw_<mode>"
4213 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4215 (vec_concat:<VS_double>
4216 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4217 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
4218 (parallel [(const_int 2) (const_int 6)
4219 (const_int 3) (const_int 7)])))]
4220 "VECTOR_MEM_VSX_P (<MODE>mode)"
4222 if (BYTES_BIG_ENDIAN)
4223 return "xxmrglw %x0,%x1,%x2";
4225 return "xxmrghw %x0,%x2,%x1";
4227 [(set_attr "type" "vecperm")])
4229 ;; Shift left double by word immediate
4230 (define_insn "vsx_xxsldwi_<mode>"
4231 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
4232 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
4233 (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
4234 (match_operand:QI 3 "u5bit_cint_operand" "i")]
4236 "VECTOR_MEM_VSX_P (<MODE>mode)"
4237 "xxsldwi %x0,%x1,%x2,%3"
4238 [(set_attr "type" "vecperm")])
4241 ;; Vector reduction insns and splitters
4243 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4244 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
4248 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4249 (parallel [(const_int 1)]))
4252 (parallel [(const_int 0)])))
4254 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
4255 "VECTOR_UNIT_VSX_P (V2DFmode)"
4260 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4261 ? gen_reg_rtx (V2DFmode)
4263 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4264 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4267 [(set_attr "length" "8")
4268 (set_attr "type" "veccomplex")])
4270 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4271 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
4273 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4274 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
4275 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4276 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
4277 "VECTOR_UNIT_VSX_P (V4SFmode)"
4282 rtx op0 = operands[0];
4283 rtx op1 = operands[1];
4284 rtx tmp2, tmp3, tmp4;
4286 if (can_create_pseudo_p ())
4288 tmp2 = gen_reg_rtx (V4SFmode);
4289 tmp3 = gen_reg_rtx (V4SFmode);
4290 tmp4 = gen_reg_rtx (V4SFmode);
4299 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4300 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4301 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4302 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4305 [(set_attr "length" "16")
4306 (set_attr "type" "veccomplex")])
4308 ;; Combiner patterns with the vector reduction patterns that knows we can get
4309 ;; to the top element of the V2DF array without doing an extract.
4311 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4312 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
4317 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4318 (parallel [(const_int 1)]))
4321 (parallel [(const_int 0)])))
4323 (parallel [(const_int 1)])))
4324 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
4325 "VECTOR_UNIT_VSX_P (V2DFmode)"
4330 rtx hi = gen_highpart (DFmode, operands[1]);
4331 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4332 ? gen_reg_rtx (DFmode)
4335 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4336 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4339 [(set_attr "length" "8")
4340 (set_attr "type" "veccomplex")])
4342 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4343 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
4346 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4347 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
4348 (parallel [(const_int 3)])))
4349 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4350 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
4351 (clobber (match_scratch:V4SF 4 "=0,0"))]
4352 "VECTOR_UNIT_VSX_P (V4SFmode)"
4357 rtx op0 = operands[0];
4358 rtx op1 = operands[1];
4359 rtx tmp2, tmp3, tmp4, tmp5;
4361 if (can_create_pseudo_p ())
4363 tmp2 = gen_reg_rtx (V4SFmode);
4364 tmp3 = gen_reg_rtx (V4SFmode);
4365 tmp4 = gen_reg_rtx (V4SFmode);
4366 tmp5 = gen_reg_rtx (V4SFmode);
4376 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4377 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4378 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4379 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4380 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4383 [(set_attr "length" "20")
4384 (set_attr "type" "veccomplex")])
4387 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
4389 [(set (match_operand:P 0 "base_reg_operand")
4390 (match_operand:P 1 "short_cint_operand"))
4391 (set (match_operand:VSX_M 2 "vsx_register_operand")
4392 (mem:VSX_M (plus:P (match_dup 0)
4393 (match_operand:P 3 "int_reg_operand"))))]
4394 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4395 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4396 [(set_attr "length" "8")
4397 (set_attr "type" "vecload")])
4400 [(set (match_operand:P 0 "base_reg_operand")
4401 (match_operand:P 1 "short_cint_operand"))
4402 (set (match_operand:VSX_M 2 "vsx_register_operand")
4403 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4405 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4406 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4407 [(set_attr "length" "8")
4408 (set_attr "type" "vecload")])
4411 ;; ISA 3.0 vector extend sign support
4413 (define_insn "vsx_sign_extend_qi_<mode>"
4414 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4416 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4417 UNSPEC_VSX_SIGN_EXTEND))]
4420 [(set_attr "type" "vecexts")])
4422 (define_insn "vsx_sign_extend_hi_<mode>"
4423 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4425 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4426 UNSPEC_VSX_SIGN_EXTEND))]
4429 [(set_attr "type" "vecexts")])
4431 (define_insn "*vsx_sign_extend_si_v2di"
4432 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4433 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4434 UNSPEC_VSX_SIGN_EXTEND))]
4437 [(set_attr "type" "vecexts")])
4440 ;; ISA 3.0 Binary Floating-Point Support
4442 ;; VSX Scalar Extract Exponent Quad-Precision
4443 (define_insn "xsxexpqp_<mode>"
4444 [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4445 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4446 UNSPEC_VSX_SXEXPDP))]
4449 [(set_attr "type" "vecmove")])
4451 ;; VSX Scalar Extract Exponent Double-Precision
4452 (define_insn "xsxexpdp"
4453 [(set (match_operand:DI 0 "register_operand" "=r")
4454 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4455 UNSPEC_VSX_SXEXPDP))]
4456 "TARGET_P9_VECTOR && TARGET_64BIT"
4458 [(set_attr "type" "integer")])
4460 ;; VSX Scalar Extract Significand Quad-Precision
4461 (define_insn "xsxsigqp_<mode>"
4462 [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4463 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4467 [(set_attr "type" "vecmove")])
4469 ;; VSX Scalar Extract Significand Double-Precision
4470 (define_insn "xsxsigdp"
4471 [(set (match_operand:DI 0 "register_operand" "=r")
4472 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4474 "TARGET_P9_VECTOR && TARGET_64BIT"
4476 [(set_attr "type" "integer")])
4478 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4479 (define_insn "xsiexpqpf_<mode>"
4480 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4482 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4483 (match_operand:DI 2 "altivec_register_operand" "v")]
4484 UNSPEC_VSX_SIEXPQP))]
4487 [(set_attr "type" "vecmove")])
4489 ;; VSX Scalar Insert Exponent Quad-Precision
4490 (define_insn "xsiexpqp_<mode>"
4491 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4492 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4493 (match_operand:DI 2 "altivec_register_operand" "v")]
4494 UNSPEC_VSX_SIEXPQP))]
4497 [(set_attr "type" "vecmove")])
4499 ;; VSX Scalar Insert Exponent Double-Precision
4500 (define_insn "xsiexpdp"
4501 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4502 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4503 (match_operand:DI 2 "register_operand" "r")]
4504 UNSPEC_VSX_SIEXPDP))]
4505 "TARGET_P9_VECTOR && TARGET_64BIT"
4506 "xsiexpdp %x0,%1,%2"
4507 [(set_attr "type" "fpsimple")])
4509 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4510 (define_insn "xsiexpdpf"
4511 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4512 (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4513 (match_operand:DI 2 "register_operand" "r")]
4514 UNSPEC_VSX_SIEXPDP))]
4515 "TARGET_P9_VECTOR && TARGET_64BIT"
4516 "xsiexpdp %x0,%1,%2"
4517 [(set_attr "type" "fpsimple")])
4519 ;; VSX Scalar Compare Exponents Double-Precision
4520 (define_expand "xscmpexpdp_<code>"
4524 [(match_operand:DF 1 "vsx_register_operand" "wa")
4525 (match_operand:DF 2 "vsx_register_operand" "wa")]
4526 UNSPEC_VSX_SCMPEXPDP)
4528 (set (match_operand:SI 0 "register_operand" "=r")
4529 (CMP_TEST:SI (match_dup 3)
4533 operands[3] = gen_reg_rtx (CCFPmode);
4536 (define_insn "*xscmpexpdp"
4537 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4539 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4540 (match_operand:DF 2 "vsx_register_operand" "wa")]
4541 UNSPEC_VSX_SCMPEXPDP)
4542 (match_operand:SI 3 "zero_constant" "j")))]
4544 "xscmpexpdp %0,%x1,%x2"
4545 [(set_attr "type" "fpcompare")])
4547 ;; VSX Scalar Test Data Class Quad-Precision
4548 ;; (Expansion for scalar_test_data_class (__ieee128, int))
4549 ;; (Has side effect of setting the lt bit if operand 1 is negative,
4550 ;; setting the eq bit if any of the conditions tested by operand 2
4551 ;; are satisfied, and clearing the gt and undordered bits to zero.)
4552 (define_expand "xststdcqp_<mode>"
4556 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4557 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4560 (set (match_operand:SI 0 "register_operand" "=r")
4561 (eq:SI (match_dup 3)
4565 operands[3] = gen_reg_rtx (CCFPmode);
4568 ;; VSX Scalar Test Data Class Double- and Single-Precision
4569 ;; (The lt bit is set if operand 1 is negative. The eq bit is set
4570 ;; if any of the conditions tested by operand 2 are satisfied.
4571 ;; The gt and unordered bits are cleared to zero.)
4572 (define_expand "xststdc<Fvsx>"
4576 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4577 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4580 (set (match_operand:SI 0 "register_operand" "=r")
4581 (eq:SI (match_dup 3)
4585 operands[3] = gen_reg_rtx (CCFPmode);
4586 operands[4] = CONST0_RTX (SImode);
4589 ;; The VSX Scalar Test Negative Quad-Precision
4590 (define_expand "xststdcnegqp_<mode>"
4594 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4598 (set (match_operand:SI 0 "register_operand" "=r")
4599 (lt:SI (match_dup 2)
4603 operands[2] = gen_reg_rtx (CCFPmode);
4606 ;; The VSX Scalar Test Negative Double- and Single-Precision
4607 (define_expand "xststdcneg<Fvsx>"
4611 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4615 (set (match_operand:SI 0 "register_operand" "=r")
4616 (lt:SI (match_dup 2)
4620 operands[2] = gen_reg_rtx (CCFPmode);
4621 operands[3] = CONST0_RTX (SImode);
4624 (define_insn "*xststdcqp_<mode>"
4625 [(set (match_operand:CCFP 0 "" "=y")
4628 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4629 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4633 "xststdcqp %0,%1,%2"
4634 [(set_attr "type" "fpcompare")])
4636 (define_insn "*xststdc<Fvsx>"
4637 [(set (match_operand:CCFP 0 "" "=y")
4639 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4640 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4642 (match_operand:SI 3 "zero_constant" "j")))]
4644 "xststdc<Fvsx> %0,%x1,%2"
4645 [(set_attr "type" "fpcompare")])
4647 ;; VSX Vector Extract Exponent Double and Single Precision
4648 (define_insn "xvxexp<VSs>"
4649 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4651 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4654 "xvxexp<VSs> %x0,%x1"
4655 [(set_attr "type" "vecsimple")])
4657 ;; VSX Vector Extract Significand Double and Single Precision
4658 (define_insn "xvxsig<VSs>"
4659 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4661 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4664 "xvxsig<VSs> %x0,%x1"
4665 [(set_attr "type" "vecsimple")])
4667 ;; VSX Vector Insert Exponent Double and Single Precision
4668 (define_insn "xviexp<VSs>"
4669 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4671 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4672 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4675 "xviexp<VSs> %x0,%x1,%x2"
4676 [(set_attr "type" "vecsimple")])
4678 ;; VSX Vector Test Data Class Double and Single Precision
4679 ;; The corresponding elements of the result vector are all ones
4680 ;; if any of the conditions tested by operand 3 are satisfied.
4681 (define_insn "xvtstdc<VSs>"
4682 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4684 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4685 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4686 UNSPEC_VSX_VTSTDC))]
4688 "xvtstdc<VSs> %x0,%x1,%2"
4689 [(set_attr "type" "vecsimple")])
4691 ;; ISA 3.0 String Operations Support
4693 ;; Compare vectors producing a vector result and a predicate, setting CR6
4694 ;; to indicate a combined status. This pattern matches v16qi, v8hi, and
4695 ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no
4696 ;; need to match v4sf, v2df, or v2di modes because those are expanded
4697 ;; to use Power8 instructions.
4698 (define_insn "*vsx_ne_<mode>_p"
4699 [(set (reg:CC CR6_REGNO)
4701 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4702 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4704 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4705 (ne:VSX_EXTRACT_I (match_dup 1)
4708 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4709 [(set_attr "type" "vecsimple")])
4711 (define_insn "*vector_nez_<mode>_p"
4712 [(set (reg:CC CR6_REGNO)
4713 (unspec:CC [(unspec:VI
4714 [(match_operand:VI 1 "gpc_reg_operand" "v")
4715 (match_operand:VI 2 "gpc_reg_operand" "v")]
4718 (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4719 (unspec:VI [(match_dup 1)
4723 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4724 [(set_attr "type" "vecsimple")])
4726 ;; Return first position of match between vectors using natural order
4727 ;; for both LE and BE execution modes.
4728 (define_expand "first_match_index_<mode>"
4729 [(match_operand:SI 0 "register_operand")
4730 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4731 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4732 UNSPEC_VSX_FIRST_MATCH_INDEX)]
4737 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4738 rtx not_result = gen_reg_rtx (<MODE>mode);
4740 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4742 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4744 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4746 if (<MODE>mode == V16QImode)
4748 if (!BYTES_BIG_ENDIAN)
4749 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4751 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
4755 rtx tmp = gen_reg_rtx (SImode);
4756 if (!BYTES_BIG_ENDIAN)
4757 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4759 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
4760 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4765 ;; Return first position of match between vectors or end of string (EOS) using
4766 ;; natural element order for both LE and BE execution modes.
4767 (define_expand "first_match_or_eos_index_<mode>"
4768 [(match_operand:SI 0 "register_operand")
4769 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4770 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4771 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4775 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4776 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4777 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4778 rtx and_result = gen_reg_rtx (<MODE>mode);
4779 rtx result = gen_reg_rtx (<MODE>mode);
4780 rtx vzero = gen_reg_rtx (<MODE>mode);
4782 /* Vector with zeros in elements that correspond to zeros in operands. */
4783 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4784 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4785 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4786 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4788 /* Vector with ones in elments that do not match. */
4789 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4792 /* Create vector with ones in elements where there was a zero in one of
4793 the source elements or the elements that match. */
4794 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4795 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4797 if (<MODE>mode == V16QImode)
4799 if (!BYTES_BIG_ENDIAN)
4800 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4802 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4806 rtx tmp = gen_reg_rtx (SImode);
4807 if (!BYTES_BIG_ENDIAN)
4808 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4810 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4811 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4816 ;; Return first position of mismatch between vectors using natural
4817 ;; element order for both LE and BE execution modes.
4818 (define_expand "first_mismatch_index_<mode>"
4819 [(match_operand:SI 0 "register_operand")
4820 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4821 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4822 UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4826 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4828 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4830 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4832 if (<MODE>mode == V16QImode)
4834 if (!BYTES_BIG_ENDIAN)
4835 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4837 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
4841 rtx tmp = gen_reg_rtx (SImode);
4842 if (!BYTES_BIG_ENDIAN)
4843 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4845 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
4846 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4851 ;; Return first position of mismatch between vectors or end of string (EOS)
4852 ;; using natural element order for both LE and BE execution modes.
4853 (define_expand "first_mismatch_or_eos_index_<mode>"
4854 [(match_operand:SI 0 "register_operand")
4855 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4856 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4857 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4861 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4862 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4863 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4864 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4865 rtx and_result = gen_reg_rtx (<MODE>mode);
4866 rtx result = gen_reg_rtx (<MODE>mode);
4867 rtx vzero = gen_reg_rtx (<MODE>mode);
4869 /* Vector with zeros in elements that correspond to zeros in operands. */
4870 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4872 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4873 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4874 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4876 /* Vector with ones in elments that match. */
4877 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4879 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4881 /* Create vector with ones in elements where there was a zero in one of
4882 the source elements or the elements did not match. */
4883 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4884 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4886 if (<MODE>mode == V16QImode)
4888 if (!BYTES_BIG_ENDIAN)
4889 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4891 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4895 rtx tmp = gen_reg_rtx (SImode);
4896 if (!BYTES_BIG_ENDIAN)
4897 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4899 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4900 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4905 ;; Load VSX Vector with Length
4906 (define_expand "lxvl"
4908 (ashift:DI (match_operand:DI 2 "register_operand")
4910 (set (match_operand:V16QI 0 "vsx_register_operand")
4912 [(match_operand:DI 1 "gpc_reg_operand")
4913 (mem:V16QI (match_dup 1))
4916 "TARGET_P9_VECTOR && TARGET_64BIT"
4918 operands[3] = gen_reg_rtx (DImode);
4921 (define_insn "*lxvl"
4922 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4924 [(match_operand:DI 1 "gpc_reg_operand" "b")
4925 (mem:V16QI (match_dup 1))
4926 (match_operand:DI 2 "register_operand" "r")]
4928 "TARGET_P9_VECTOR && TARGET_64BIT"
4930 [(set_attr "type" "vecload")])
4932 (define_insn "lxvll"
4933 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4934 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
4935 (mem:V16QI (match_dup 1))
4936 (match_operand:DI 2 "register_operand" "r")]
4940 [(set_attr "type" "vecload")])
4942 ;; Expand for builtin xl_len_r
4943 (define_expand "xl_len_r"
4944 [(match_operand:V16QI 0 "vsx_register_operand")
4945 (match_operand:DI 1 "register_operand")
4946 (match_operand:DI 2 "register_operand")]
4949 rtx shift_mask = gen_reg_rtx (V16QImode);
4950 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4951 rtx tmp = gen_reg_rtx (DImode);
4953 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
4954 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4955 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
4956 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
4961 (define_insn "stxvll"
4962 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4963 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4964 (mem:V16QI (match_dup 1))
4965 (match_operand:DI 2 "register_operand" "r")]
4969 [(set_attr "type" "vecstore")])
4971 ;; Store VSX Vector with Length
4972 (define_expand "stxvl"
4974 (ashift:DI (match_operand:DI 2 "register_operand")
4976 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
4978 [(match_operand:V16QI 0 "vsx_register_operand")
4979 (mem:V16QI (match_dup 1))
4982 "TARGET_P9_VECTOR && TARGET_64BIT"
4984 operands[3] = gen_reg_rtx (DImode);
4987 (define_insn "*stxvl"
4988 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4990 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
4991 (mem:V16QI (match_dup 1))
4992 (match_operand:DI 2 "register_operand" "r")]
4994 "TARGET_P9_VECTOR && TARGET_64BIT"
4996 [(set_attr "type" "vecstore")])
4998 ;; Expand for builtin xst_len_r
4999 (define_expand "xst_len_r"
5000 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5001 (match_operand:DI 1 "register_operand" "b")
5002 (match_operand:DI 2 "register_operand" "r")]
5005 rtx shift_mask = gen_reg_rtx (V16QImode);
5006 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5007 rtx tmp = gen_reg_rtx (DImode);
5009 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5010 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5012 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5013 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5017 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5018 (define_insn "vcmpneb"
5019 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5021 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5022 (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5025 [(set_attr "type" "vecsimple")])
5027 ;; Vector Compare Not Equal or Zero Byte
5028 (define_insn "vcmpnezb"
5029 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5031 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5032 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5036 [(set_attr "type" "vecsimple")])
5038 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5039 (define_insn "vcmpneh"
5040 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5042 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5043 (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5046 [(set_attr "type" "vecsimple")])
5048 ;; Vector Compare Not Equal or Zero Half Word
5049 (define_insn "vcmpnezh"
5050 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5051 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5052 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5056 [(set_attr "type" "vecsimple")])
5058 ;; Vector Compare Not Equal Word (specified/not+eq:)
5059 (define_insn "vcmpnew"
5060 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5062 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5063 (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5066 [(set_attr "type" "vecsimple")])
5068 ;; Vector Compare Not Equal or Zero Word
5069 (define_insn "vcmpnezw"
5070 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5071 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5072 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5076 [(set_attr "type" "vecsimple")])
5078 ;; Vector Count Leading Zero Least-Significant Bits Byte
5079 (define_insn "vclzlsbb_<mode>"
5080 [(set (match_operand:SI 0 "register_operand" "=r")
5082 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5086 [(set_attr "type" "vecsimple")])
5088 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5089 (define_insn "vctzlsbb_<mode>"
5090 [(set (match_operand:SI 0 "register_operand" "=r")
5092 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5096 [(set_attr "type" "vecsimple")])
5098 ;; Vector Extract Unsigned Byte Left-Indexed
5099 (define_insn "vextublx"
5100 [(set (match_operand:SI 0 "register_operand" "=r")
5102 [(match_operand:SI 1 "register_operand" "r")
5103 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5107 [(set_attr "type" "vecsimple")])
5109 ;; Vector Extract Unsigned Byte Right-Indexed
5110 (define_insn "vextubrx"
5111 [(set (match_operand:SI 0 "register_operand" "=r")
5113 [(match_operand:SI 1 "register_operand" "r")
5114 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5118 [(set_attr "type" "vecsimple")])
5120 ;; Vector Extract Unsigned Half Word Left-Indexed
5121 (define_insn "vextuhlx"
5122 [(set (match_operand:SI 0 "register_operand" "=r")
5124 [(match_operand:SI 1 "register_operand" "r")
5125 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5129 [(set_attr "type" "vecsimple")])
5131 ;; Vector Extract Unsigned Half Word Right-Indexed
5132 (define_insn "vextuhrx"
5133 [(set (match_operand:SI 0 "register_operand" "=r")
5135 [(match_operand:SI 1 "register_operand" "r")
5136 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5140 [(set_attr "type" "vecsimple")])
5142 ;; Vector Extract Unsigned Word Left-Indexed
5143 (define_insn "vextuwlx"
5144 [(set (match_operand:SI 0 "register_operand" "=r")
5146 [(match_operand:SI 1 "register_operand" "r")
5147 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5151 [(set_attr "type" "vecsimple")])
5153 ;; Vector Extract Unsigned Word Right-Indexed
5154 (define_insn "vextuwrx"
5155 [(set (match_operand:SI 0 "register_operand" "=r")
5157 [(match_operand:SI 1 "register_operand" "r")
5158 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5162 [(set_attr "type" "vecsimple")])
5164 ;; Vector insert/extract word at arbitrary byte values. Note, the little
5165 ;; endian version needs to adjust the byte number, and the V4SI element in
5167 (define_insn "extract4b"
5168 [(set (match_operand:V2DI 0 "vsx_register_operand")
5169 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5170 (match_operand:QI 2 "const_0_to_12_operand" "n")]
5171 UNSPEC_XXEXTRACTUW))]
5174 if (!BYTES_BIG_ENDIAN)
5175 operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5177 return "xxextractuw %x0,%x1,%2";
5180 (define_expand "insert4b"
5181 [(set (match_operand:V16QI 0 "vsx_register_operand")
5182 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5183 (match_operand:V16QI 2 "vsx_register_operand")
5184 (match_operand:QI 3 "const_0_to_12_operand")]
5188 if (!BYTES_BIG_ENDIAN)
5190 rtx op1 = operands[1];
5191 rtx v4si_tmp = gen_reg_rtx (V4SImode);
5192 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5193 operands[1] = v4si_tmp;
5194 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5198 (define_insn "*insert4b_internal"
5199 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5200 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5201 (match_operand:V16QI 2 "vsx_register_operand" "0")
5202 (match_operand:QI 3 "const_0_to_12_operand" "n")]
5205 "xxinsertw %x0,%x1,%3"
5206 [(set_attr "type" "vecperm")])
5209 ;; Generate vector extract four float 32 values from left four elements
5210 ;; of eight element vector of float 16 values.
5211 (define_expand "vextract_fp_from_shorth"
5212 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5213 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5214 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5218 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5219 int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5222 rtx mask = gen_reg_rtx (V16QImode);
5223 rtx tmp = gen_reg_rtx (V16QImode);
5226 for (i = 0; i < 16; i++)
5227 if (!BYTES_BIG_ENDIAN)
5228 rvals[i] = GEN_INT (vals_le[i]);
5230 rvals[i] = GEN_INT (vals_be[i]);
5232 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5233 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5234 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5235 conversion instruction. */
5236 v = gen_rtvec_v (16, rvals);
5237 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5238 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5239 operands[1], mask));
5240 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5244 ;; Generate vector extract four float 32 values from right four elements
5245 ;; of eight element vector of float 16 values.
5246 (define_expand "vextract_fp_from_shortl"
5247 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5248 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5249 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5252 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5253 int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5257 rtx mask = gen_reg_rtx (V16QImode);
5258 rtx tmp = gen_reg_rtx (V16QImode);
5261 for (i = 0; i < 16; i++)
5262 if (!BYTES_BIG_ENDIAN)
5263 rvals[i] = GEN_INT (vals_le[i]);
5265 rvals[i] = GEN_INT (vals_be[i]);
5267 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5268 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5269 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5270 conversion instruction. */
5271 v = gen_rtvec_v (16, rvals);
5272 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5273 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5274 operands[1], mask));
5275 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5279 ;; Support for ISA 3.0 vector byte reverse
5281 ;; Swap all bytes with in a vector
5282 (define_insn "p9_xxbrq_v1ti"
5283 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5284 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5287 [(set_attr "type" "vecperm")])
5289 (define_expand "p9_xxbrq_v16qi"
5290 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5291 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5294 rtx op0 = gen_reg_rtx (V1TImode);
5295 rtx op1 = gen_lowpart (V1TImode, operands[1]);
5296 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5297 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5301 ;; Swap all bytes in each 64-bit element
5302 (define_insn "p9_xxbrd_v2di"
5303 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5304 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5307 [(set_attr "type" "vecperm")])
5309 (define_expand "p9_xxbrd_v2df"
5310 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5311 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5314 rtx op0 = gen_reg_rtx (V2DImode);
5315 rtx op1 = gen_lowpart (V2DImode, operands[1]);
5316 emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5317 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5321 ;; Swap all bytes in each 32-bit element
5322 (define_insn "p9_xxbrw_v4si"
5323 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5324 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5327 [(set_attr "type" "vecperm")])
5329 (define_expand "p9_xxbrw_v4sf"
5330 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5331 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5334 rtx op0 = gen_reg_rtx (V4SImode);
5335 rtx op1 = gen_lowpart (V4SImode, operands[1]);
5336 emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5337 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5341 ;; Swap all bytes in each element of vector
5342 (define_expand "revb_<mode>"
5343 [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5344 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5347 if (TARGET_P9_VECTOR)
5348 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5351 /* Want to have the elements in reverse order relative
5352 to the endian mode in use, i.e. in LE mode, put elements
5354 rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5355 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5362 ;; Reversing bytes in vector char is just a NOP.
5363 (define_expand "revb_v16qi"
5364 [(set (match_operand:V16QI 0 "vsx_register_operand")
5365 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5368 emit_move_insn (operands[0], operands[1]);
5372 ;; Swap all bytes in each 16-bit element
5373 (define_insn "p9_xxbrh_v8hi"
5374 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5375 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5378 [(set_attr "type" "vecperm")])
5381 ;; Operand numbers for the following peephole2
5383 [(SFBOOL_TMP_GPR 0) ;; GPR temporary
5384 (SFBOOL_TMP_VSX 1) ;; vector temporary
5385 (SFBOOL_MFVSR_D 2) ;; move to gpr dest
5386 (SFBOOL_MFVSR_A 3) ;; move to gpr src
5387 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest
5388 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1
5389 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1
5390 (SFBOOL_SHL_D 7) ;; shift left dest
5391 (SFBOOL_SHL_A 8) ;; shift left arg
5392 (SFBOOL_MTVSR_D 9) ;; move to vecter dest
5393 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode
5394 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode
5395 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode
5396 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode
5398 ;; Attempt to optimize some common GLIBC operations using logical operations to
5399 ;; pick apart SFmode operations. For example, there is code from e_powf.c
5400 ;; after macro expansion that looks like:
5405 ;; } ieee_float_shape_type;
5411 ;; ieee_float_shape_type gf_u;
5412 ;; gf_u.value = (t1);
5413 ;; (is) = gf_u.word;
5417 ;; ieee_float_shape_type sf_u;
5418 ;; sf_u.word = (is & 0xfffff000);
5419 ;; (t1) = sf_u.value;
5423 ;; This would result in two direct move operations (convert to memory format,
5424 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5425 ;; scalar format). With this peephole, we eliminate the direct move to the
5426 ;; GPR, and instead move the integer mask value to the vector register after a
5427 ;; shift and do the VSX logical operation.
5429 ;; The insns for dealing with SFmode in GPR registers looks like:
5430 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5432 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5434 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5436 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5438 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5440 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5443 [(match_scratch:DI SFBOOL_TMP_GPR "r")
5444 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5446 ;; MFVSRWZ (aka zero_extend)
5447 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5449 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5451 ;; AND/IOR/XOR operation on int
5452 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5453 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5454 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5457 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5458 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5462 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5463 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5465 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5466 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5467 to compare registers, when the mode is different. */
5468 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5469 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5470 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])
5471 && (REG_P (operands[SFBOOL_BOOL_A2])
5472 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5473 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5474 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5475 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5476 || (REG_P (operands[SFBOOL_BOOL_A2])
5477 && REGNO (operands[SFBOOL_MFVSR_D])
5478 == REGNO (operands[SFBOOL_BOOL_A2])))
5479 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5480 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5481 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5482 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5483 [(set (match_dup SFBOOL_TMP_GPR)
5484 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5487 (set (match_dup SFBOOL_TMP_VSX_DI)
5488 (match_dup SFBOOL_TMP_GPR))
5490 (set (match_dup SFBOOL_MTVSR_D_V4SF)
5491 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5492 (match_dup SFBOOL_TMP_VSX)))]
5494 rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5495 rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5496 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5497 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5498 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5499 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5501 if (CONST_INT_P (bool_a2))
5503 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5504 emit_move_insn (tmp_gpr, bool_a2);
5505 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5509 int regno_bool_a1 = REGNO (bool_a1);
5510 int regno_bool_a2 = REGNO (bool_a2);
5511 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5512 ? regno_bool_a2 : regno_bool_a1);
5513 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5516 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5517 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5518 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);