re PR target/70915 (Improve loading 0/-1 in VSX registers on PowerPC)

author Michael Meissner <meissner@linux.vnet.ibm.com>

Wed, 18 May 2016 14:04:32 +0000 (14:04 +0000)

committer Michael Meissner <meissner@gcc.gnu.org>

Wed, 18 May 2016 14:04:32 +0000 (14:04 +0000)
author Michael Meissner <meissner@linux.vnet.ibm.com>
Wed, 18 May 2016 14:04:32 +0000 (14:04 +0000)
committer Michael Meissner <meissner@gcc.gnu.org>
Wed, 18 May 2016 14:04:32 +0000 (14:04 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 4ceebb4789f8fac7163f8cf92d9f17d0b7df2a9a..731a7bd3a88575a33e2cceab363043ec11f674ec 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,76 @@
+2016-05-18  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+       PR target/70915
+       * config/rs6000/constraints.md (wE constraint): New constraint
+       for a vector constant that can be loaded with XXSPLTIB.
+       (wM constraint): New constraint for a vector constant of a 1's.
+       (wS constraint): New constraint for a vector constant that can be
+       loaded with XXSPLTIB and a vector sign extend instruction.
+       * config/rs6000/predicates.md (xxspltib_constant_split): New
+       predicates for wE/wS constraints.
+       (xxspltib_constant_nosplit): Likewise.
+       (easy_vector_constant): Add support for constants that can be
+       loaded via XXSPLTIB.
+       (all_ones_constant): New predicate for vector constant with all
+       1's set.
+       (splat_input_operand): Add support for ISA 3.0 word splat
+       operations.
+       * config/rs6000/rs6000.c (xxspltib_constant_p): New function to
+       return if a constant can be loaded with the ISA 3.0 XXSPLTIB
+       instruction and possibly with a sign extension.
+       (output_vec_const_move): Add support for XXSPLTIB. If we are
+       loading up 0/-1 into Altivec registers, prefer using VSPLTISW
+       instead of XXLXOR/XXLORC.
+       (rs6000_expand_vector_init): Add support for ISA 3.0 word splat
+       operations.
+       (rs6000_legitimize_reload_address): Likewise.
+       (rs6000_output_move_128bit): Use output_vec_const_move to emit
+       constants.
+       * config/rs6000/vsx.md (VSX_M): Add TImode (if -mvsx-timode) and
+       combine VSX_M and VSX_M2 into one iterator.
+       (VSX_M2): Likewise.
+       (VSINT_84): New iterators for loading constants with XXSPLTIB.
+       (VSINT_842): Likewise.
+       (UNSPEC_VSX_SIGN_EXTEND): New UNSPEC.
+       (xxspltib_v16qi): New insns to load up constants with the ISA 3.0
+       XXSPLTIB instruction.
+       (xxspltib_<mode>_nosplit): Likewise.
+       (xxspltib_<mode>_split): New insn to load up constants with
+       XXSPLTIB and a sign extend instruction.
+       (vsx_mov<mode>): Replace single move that handled all vector types
+       with separate 32-bit and 64-bit moves.  Combine the movti_<bit>
+       moves (when -mvsx-timode is in effect) into the main vector
+       moves.  Eliminate separate moves for <VSr> <VSa>, where the
+       preferred register class (<VSr>) is listed first, and the
+       secondary register class (<VSa>) is listed second with a '?' to
+       discourage use.  Prefer loading 0/-1 in any VSX register for ISA
+       3.0, and Altivec registers for ISA 2.06/2.07 (PR target/70915) so
+       that if the register was involved in a slow operation, the
+       clear/set operation does not wait for the slow operation to
+       finish.  Adjust the length attributes for 32-bit mode.  Use
+       rs6000_output_move_128bit and drop the use of the string
+       instructions for 32-bit movti when -mvsx-timode is in effect.  Use
+       spacing so that the alternatives and attributes don't generate
+       long lines, and put things in columns, so that it is easier to
+       match up the operands and attributes with the insn alternatives.
+       (vsx_mov<mode>_64bit): Likewise.
+       (vsx_mov<mode>_32bit): Likewise.
+       (vsx_movti_64bit): Fold movti into normal vector moves.
+       (vsx_movti_32bit): Likewise.
+       (vsx_splat_<mode>, V4SI/V4SF modes): Add support for ISA 3.0 word
+       spat instructions.
+       (vsx_splat_v4si_internal): Likewise.
+       (vsx_splat_v4sf_internal): Likewise.
+       (vector fusion peepholes): Use VSX_M instead of VSX_M2.
+       (vsx_sign_extend_qi_<mode>): New ISA 3.0 instructions to sign
+       extend vector elements.
+       (vsx_sign_extend_hi_<mode>): Likewise.
+       (vsx_sign_extend_si_v2di): Likewise.
+       * config/rs6000/rs6000-protos.h (xxspltib_constant_p): Add
+       declaration.
+       * doc/md.texi (PowerPC constraints): Document the wE, wM, and wS
+       constraints.  Add trailing period to wL documentation.
+
  2016-05-18  Richard Sandiford  <richard.sandiford@arm.com>
  
         PR middle-end/71020
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md

index a3abe6ab80a762a5c73aaff21322d242be3b591b..ef8f617d9a8784122ddcc3e29eb8aaa6d4625be8 100644 (file)
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -140,6 +140,10 @@
    (and (match_code "const_int")
         (match_test "TARGET_VSX && (ival == VECTOR_ELEMENT_SCALAR_64BIT)")))
  
+(define_constraint "wE"
+  "Vector constant that can be loaded with the XXSPLTIB instruction."
+  (match_test "xxspltib_constant_nosplit (op, mode)"))
+
  ;; Extended fusion store
  (define_memory_constraint "wF"
    "Memory operand suitable for power9 fusion load/stores"
@@ -156,6 +160,12 @@
         (and (match_test "TARGET_DIRECT_MOVE_128")
             (match_test "(ival == VECTOR_ELEMENT_MFVSRLD_64BIT)"))))
  
+;; Generate the XXORC instruction to set a register to all 1's
+(define_constraint "wM"
+  "Match vector constant with all 1's if the XXLORC instruction is available"
+  (and (match_test "TARGET_P8_VECTOR")
+       (match_operand 0 "all_ones_constant")))
+
  ;; ISA 3.0 vector d-form addresses
  (define_memory_constraint "wO"
    "Memory operand suitable for the ISA 3.0 vector d-form instructions."
@@ -166,6 +176,10 @@
    "Memory operand suitable for the load/store quad instructions"
    (match_operand 0 "quad_memory_operand"))
  
+(define_constraint "wS"
+  "Vector constant that can be loaded with XXSPLTIB & sign extension."
+  (match_test "xxspltib_constant_split (op, mode)"))
+
  ;; Altivec style load/store that ignores the bottom bits of the address
  (define_memory_constraint "wZ"
    "Indexed or indirect memory operand, ignoring the bottom 4 bits"
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md

index ebc924673445c13cfba3cef64c1694149dce8a2b..5b852a12c212c047164fe9b7175e6345288f8fb4 100644 (file)
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -565,6 +565,38 @@
      }
  })
  
+;; Return 1 if the operand is a CONST_VECTOR or VEC_DUPLICATE of a constant
+;; that can loaded with a XXSPLTIB instruction and then a VUPKHSB, VECSB2W or
+;; VECSB2D instruction.
+
+(define_predicate "xxspltib_constant_split"
+  (match_code "const_vector,vec_duplicate,const_int")
+{
+  int value = 256;
+  int num_insns = -1;
+
+  if (!xxspltib_constant_p (op, mode, &num_insns, &value))
+    return false;
+
+  return num_insns > 1;
+})
+
+
+;; Return 1 if the operand is a CONST_VECTOR that can loaded directly with a
+;; XXSPLTIB instruction.
+
+(define_predicate "xxspltib_constant_nosplit"
+  (match_code "const_vector,vec_duplicate,const_int")
+{
+  int value = 256;
+  int num_insns = -1;
+
+  if (!xxspltib_constant_p (op, mode, &num_insns, &value))
+    return false;
+
+  return num_insns == 1;
+})
+
  ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
  ;; vector register without using memory.
  (define_predicate "easy_vector_constant"
@@ -583,7 +615,14 @@
  
    if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
      {
-      if (zero_constant (op, mode))
+      int value = 256;
+      int num_insns = -1;
+
+      if (zero_constant (op, mode) || all_ones_constant (op, mode))
+       return true;
+
+      if (TARGET_P9_VECTOR
+          && xxspltib_constant_p (op, mode, &num_insns, &value))
         return true;
  
        return easy_altivec_constant (op, mode);
@@ -662,6 +701,11 @@
    (and (match_code "const_int,const_double,const_wide_int,const_vector")
         (match_test "op == CONST0_RTX (mode)")))
  
+;; Return 1 if operand is constant -1 (scalars and vectors).
+(define_predicate "all_ones_constant"
+  (and (match_code "const_int,const_double,const_wide_int,const_vector")
+       (match_test "op == CONSTM1_RTX (mode) && !FLOAT_MODE_P (mode)")))
+
  ;; Return 1 if operand is 0.0.
  (define_predicate "zero_fp_constant"
    (and (match_code "const_double")
@@ -1024,6 +1068,10 @@
         mode = V2DFmode;
        else if (mode == DImode)
         mode = V2DImode;
+      else if (mode == SImode && TARGET_P9_VECTOR)
+       mode = V4SImode;
+      else if (mode == SFmode && TARGET_P9_VECTOR)
+       mode = V4SFmode;
        else
         gcc_unreachable ();
        return memory_address_addr_space_p (mode, XEXP (op, 0),
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h

index f75624f70d8ebe9bf4690d1073c25758263ea916..6b4d17801d08d3abbd007b4de9334a4d092ae95e 100644 (file)
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -31,6 +31,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
  #endif /* TREE_CODE */
  
  extern bool easy_altivec_constant (rtx, machine_mode);
+extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
  extern int vspltis_shifted (rtx);
  extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
  extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c

index 3f721c67cd58576a72b3ccfa25f196190bf4b22c..a1841bce765d708cfc5f0ac3f1f03b54771f6717 100644 (file)
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6241,6 +6241,128 @@ gen_easy_altivec_constant (rtx op)
    gcc_unreachable ();
  }
  
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
+   instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
+
+   Return the number of instructions needed (1 or 2) into the address pointed
+   via NUM_INSNS_PTR.
+
+   If NOSPLIT_P, only return true for constants that only generate the XXSPLTIB
+   instruction and can go in any VSX register.  If !NOSPLIT_P, only return true
+   for constants that generate XXSPLTIB and need a sign extend operation, which
+   restricts us to the Altivec registers.
+
+   Allow either (vec_const [...]) or (vec_duplicate <const>).  If OP is a valid
+   XXSPLTIB constant, return the constant being set via the CONST_PTR
+   pointer.  */
+
+bool
+xxspltib_constant_p (rtx op,
+                    machine_mode mode,
+                    int *num_insns_ptr,
+                    int *constant_ptr)
+{
+  size_t nunits = GET_MODE_NUNITS (mode);
+  size_t i;
+  HOST_WIDE_INT value;
+  rtx element;
+
+  /* Set the returned values to out of bound values.  */
+  *num_insns_ptr = -1;
+  *constant_ptr = 256;
+
+  if (!TARGET_P9_VECTOR)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  else if (mode != GET_MODE (op))
+    return false;
+
+  /* Handle (vec_duplicate <constant>).  */
+  if (GET_CODE (op) == VEC_DUPLICATE)
+    {
+      if (mode != V16QImode && mode != V8HImode && mode != V4SImode
+         && mode != V2DImode)
+       return false;
+
+      element = XEXP (op, 0);
+      if (!CONST_INT_P (element))
+       return false;
+
+      value = INTVAL (element);
+      if (!IN_RANGE (value, -128, 127))
+       return false;
+    }
+
+  /* Handle (const_vector [...]).  */
+  else if (GET_CODE (op) == CONST_VECTOR)
+    {
+      if (mode != V16QImode && mode != V8HImode && mode != V4SImode
+         && mode != V2DImode)
+       return false;
+
+      element = CONST_VECTOR_ELT (op, 0);
+      if (!CONST_INT_P (element))
+       return false;
+
+      value = INTVAL (element);
+      if (!IN_RANGE (value, -128, 127))
+       return false;
+
+      for (i = 1; i < nunits; i++)
+       {
+         element = CONST_VECTOR_ELT (op, i);
+         if (!CONST_INT_P (element))
+           return false;
+
+         if (value != INTVAL (element))
+           return false;
+       }
+
+      /* See if we could generate vspltisw/vspltish directly instead of
+        xxspltib + sign extend.  Special case 0/-1 to allow getting
+         any VSX register instead of an Altivec register.  */
+      if (!IN_RANGE (value, -1, 0) && EASY_VECTOR_15 (value)
+         && (mode == V4SImode || mode == V8HImode))
+       return false;
+    }
+
+  /* Handle integer constants being loaded into the upper part of the VSX
+     register as a scalar.  If the value isn't 0/-1, only allow it if
+     the mode can go in Altivec registers.  */
+  else if (CONST_INT_P (op))
+    {
+      if (!SCALAR_INT_MODE_P (mode))
+       return false;
+
+      value = INTVAL (op);
+      if (!IN_RANGE (value, -128, 127))
+       return false;
+
+      if (!IN_RANGE (value, -1, 0)
+         && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
+       return false;
+    }
+
+  else
+    return false;
+
+  /* Return # of instructions and the constant byte for XXSPLTIB.  */
+  if (mode == V16QImode)
+    *num_insns_ptr = 1;
+
+  else if (IN_RANGE (value, -1, 0))
+    *num_insns_ptr = 1;
+
+  else
+    *num_insns_ptr = 2;
+
+  *constant_ptr = (int) value;
+  return true;
+}
+
  const char *
  output_vec_const_move (rtx *operands)
  {
@@ -6254,23 +6376,60 @@ output_vec_const_move (rtx *operands)
  
    if (TARGET_VSX)
      {
+      bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
+      int xxspltib_value = 256;
+      int num_insns = -1;
+
        if (zero_constant (vec, mode))
-       return "xxlxor %x0,%x0,%x0";
+       {
+         if (TARGET_P9_VECTOR)
+           return "xxspltib %x0,0";
+
+         else if (dest_vmx_p)
+           return "vspltisw %0,0";
+
+         else
+           return "xxlxor %x0,%x0,%x0";
+       }
+
+      if (all_ones_constant (vec, mode))
+       {
+         if (TARGET_P9_VECTOR)
+           return "xxspltib %x0,255";
  
-      if (TARGET_P8_VECTOR && vec == CONSTM1_RTX (mode))
-       return "xxlorc %x0,%x0,%x0";
+         else if (dest_vmx_p)
+           return "vspltisw %0,-1";
  
-      if ((mode == V2DImode || mode == V1TImode)
-         && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
-         && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
-       return (TARGET_P8_VECTOR) ? "xxlorc %x0,%x0,%x0" : "vspltisw %0,-1";
+         else if (TARGET_P8_VECTOR)
+           return "xxlorc %x0,%x0,%x0";
+
+         else
+           gcc_unreachable ();
+       }
+
+      if (TARGET_P9_VECTOR
+         && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
+       {
+         if (num_insns == 1)
+           {
+             operands[2] = GEN_INT (xxspltib_value & 0xff);
+             return "xxspltib %x0,%2";
+           }
+
+         return "#";
+       }
      }
  
    if (TARGET_ALTIVEC)
      {
        rtx splat_vec;
+
+      gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
        if (zero_constant (vec, mode))
-       return "vxor %0,%0,%0";
+       return "vspltisw %0,0";
+
+      if (all_ones_constant (vec, mode))
+       return "vspltisw %0,-1";
  
        /* Do we need to construct a value using VSLDOI?  */
        shift = vspltis_shifted (vec);
@@ -6543,6 +6702,15 @@ rs6000_expand_vector_init (rtx target, rtx vals)
        return;
      }
  
+  /* Word values on ISA 3.0 can use mtvsrws, lxvwsx, or vspltisw.  V4SF is
+     complicated since scalars are stored as doubles in the registers.  */
+  if (TARGET_P9_VECTOR && mode == V4SImode && all_same
+      && VECTOR_MEM_VSX_P (mode))
+    {
+      emit_insn (gen_vsx_splat_v4si (target, XVECEXP (vals, 0, 0)));
+      return;
+    }
+
    /* With single precision floating point on VSX, know that internally single
       precision is actually represented as a double, and either make 2 V2DF
       vectors, and convert these vectors to single precision, or do one
@@ -6551,14 +6719,23 @@ rs6000_expand_vector_init (rtx target, rtx vals)
      {
        if (all_same)
         {
-         rtx freg = gen_reg_rtx (V4SFmode);
-         rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
-         rtx cvt  = ((TARGET_XSCVDPSPN)
-                     ? gen_vsx_xscvdpspn_scalar (freg, sreg)
-                     : gen_vsx_xscvdpsp_scalar (freg, sreg));
+         rtx op0 = XVECEXP (vals, 0, 0);
+
+         if (TARGET_P9_VECTOR)
+           emit_insn (gen_vsx_splat_v4sf (target, op0));
  
-         emit_insn (cvt);
-         emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
+         else
+           {
+             rtx freg = gen_reg_rtx (V4SFmode);
+             rtx sreg = force_reg (SFmode, op0);
+             rtx cvt  = (TARGET_XSCVDPSPN
+                         ? gen_vsx_xscvdpspn_scalar (freg, sreg)
+                         : gen_vsx_xscvdpsp_scalar (freg, sreg));
+
+             emit_insn (cvt);
+             emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
+                                                     const0_rtx));
+           }
         }
        else
         {
@@ -8326,12 +8503,16 @@ rs6000_legitimize_reload_address (rtx x, machine_mode mode,
  {
    bool reg_offset_p = reg_offset_addressing_ok_p (mode);
  
-  /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
-     DFmode/DImode MEM.  */
+  /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
+     DFmode/DImode MEM.  Ditto for ISA 3.0 vsx_splat_v4sf/v4si.  */
    if (reg_offset_p
        && opnum == 1
        && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
-         || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
+         || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
+         || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
+             && TARGET_P9_VECTOR)
+         || (mode == SImode && recog_data.operand_mode[0] == V4SImode
+             && TARGET_P9_VECTOR)))
      reg_offset_p = false;
  
    /* We must recognize output that we have already generated ourselves.  */
@@ -20111,10 +20292,8 @@ rs6000_output_move_128bit (rtx operands[])
        if (dest_gpr_p)
         return "#";
  
-      else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
-       return "xxlxor %x0,%x0,%x0";
-
-      else if (TARGET_ALTIVEC && dest_vmx_p)
+      else if ((dest_vmx_p && TARGET_ALTIVEC)
+              || (dest_vsx_p && TARGET_VSX))
         return output_vec_const_move (operands);
      }
  
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md

index 57cee7934ba48d01420535dba668e7586c8e552f..2b6963b0ac50f3696cd546b87fd4928715c2bfbd 100644 (file)
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -55,8 +55,7 @@
                              (KF        "FLOAT128_VECTOR_P (KFmode)")
                              (TF        "FLOAT128_VECTOR_P (TFmode)")])
  
-;; Iterator for memory move.  Handle TImode specially to allow
-;; it to use gprs as well as vsx registers.
+;; Iterator for memory moves.
  (define_mode_iterator VSX_M [V16QI
                              V8HI
                              V4SI
@@ -65,18 +64,8 @@
                              V2DF
                              V1TI
                              (KF        "FLOAT128_VECTOR_P (KFmode)")
-                            (TF        "FLOAT128_VECTOR_P (TFmode)")])
-
-(define_mode_iterator VSX_M2 [V16QI
-                             V8HI
-                             V4SI
-                             V2DI
-                             V4SF
-                             V2DF
-                             V1TI
-                             (KF       "FLOAT128_VECTOR_P (KFmode)")
-                             (TF       "FLOAT128_VECTOR_P (TFmode)")
-                             (TI       "TARGET_VSX_TIMODE")])
+                            (TF        "FLOAT128_VECTOR_P (TFmode)")
+                            (TI        "TARGET_VSX_TIMODE")])
  
  ;; Map into the appropriate load/store name based on the type
  (define_mode_attr VSm  [(V16QI "vw4")
@@ -270,6 +259,10 @@
  (define_mode_attr VS_64reg [(V2DF      "ws")
                             (V2DI       "wi")])
  
+;; Iterators for loading constants with xxspltib
+(define_mode_iterator VSINT_84  [V4SI V2DI])
+(define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
+
  ;; Constants for creating unspecs
  (define_c_enum "unspec"
    [UNSPEC_VSX_CONCAT
@@ -299,6 +292,7 @@
     UNSPEC_VSX_XVCVUXDDP
     UNSPEC_VSX_XVCVDPSXDS
     UNSPEC_VSX_XVCVDPUXDS
+   UNSPEC_VSX_SIGN_EXTEND
    ])
  
  ;; VSX moves
@@ -769,92 +763,141 @@
                            (const_int 64)))]
    "")
  
-(define_insn "*vsx_mov<mode>"
-  [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=ZwO,<VSr>,<VSr>,?ZwO,?<VSa>,?<VSa>,r,we,wQ,?&r,??Y,??r,??r,<VSr>,?<VSa>,*r,v,wZ,v")
-       (match_operand:VSX_M 1 "input_operand" "<VSr>,ZwO,<VSr>,<VSa>,ZwO,<VSa>,we,b,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)
-   && (register_operand (operands[0], <MODE>mode) 
-       || register_operand (operands[1], <MODE>mode))"
+;; Vector constants that can be generated with XXSPLTIB that was added in ISA
+;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
+(define_insn "xxspltib_v16qi"
+  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+       (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
+  "TARGET_P9_VECTOR"
  {
-  return rs6000_output_move_128bit (operands);
+  operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
+  return "xxspltib %x0,%2";
  }
-  [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,mffgpr,mftgpr,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
-   (set_attr "length" "4,4,4,4,4,4,8,4,12,12,12,12,16,4,4,*,16,4,4")])
-
-;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
-;; use of TImode is for unions.  However for plain data movement, slightly
-;; favor the vector loads
-(define_insn "*vsx_movti_64bit"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=ZwO,wa,wa,wa,r,we,v,v,wZ,wQ,&r,Y,r,r,?r")
-       (match_operand:TI 1 "input_operand" "wa,ZwO,wa,O,we,b,W,wZ,v,r,wQ,r,Y,r,n"))]
-  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
-   && (register_operand (operands[0], TImode) 
-       || register_operand (operands[1], TImode))"
+  [(set_attr "type" "vecperm")])
+
+(define_insn "xxspltib_<mode>_nosplit"
+  [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa")
+       (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "wE"))]
+  "TARGET_P9_VECTOR"
  {
-  return rs6000_output_move_128bit (operands);
+  rtx op1 = operands[1];
+  int value = 256;
+  int num_insns = -1;
+
+  if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
+      || num_insns != 1)
+    gcc_unreachable ();
+
+  operands[2] = GEN_INT (value & 0xff);
+  return "xxspltib %x0,%2";
  }
-  [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,mffgpr,mftgpr,vecsimple,vecstore,vecload,store,load,store,load,*,*")
-   (set_attr "length" "4,4,4,4,8,4,16,4,4,8,8,8,8,8,8")])
-
-(define_insn "*vsx_movti_32bit"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=ZwO,wa,wa,wa,v,v,wZ,Q,Y,????r,????r,????r,r")
-       (match_operand:TI 1 "input_operand"        "wa,ZwO,wa,O,W,wZ,v,r,r,Q,Y,r,n"))]
-  "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
-   && (register_operand (operands[0], TImode)
-       || register_operand (operands[1], TImode))"
+  [(set_attr "type" "vecperm")])
+
+(define_insn_and_split "*xxspltib_<mode>_split"
+  [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
+       (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
+  "TARGET_P9_VECTOR"
+  "#"
+  "&& 1"
+  [(const_int 0)]
  {
-  switch (which_alternative)
-    {
-    case 0:
-      return "stxvd2x %x1,%y0";
+  int value = 256;
+  int num_insns = -1;
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx tmp = ((can_create_pseudo_p ())
+            ? gen_reg_rtx (V16QImode)
+            : gen_lowpart (V16QImode, op0));
  
-    case 1:
-      return "lxvd2x %x0,%y1";
+  if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
+      || num_insns != 2)
+    gcc_unreachable ();
  
-    case 2:
-      return "xxlor %x0,%x1,%x1";
+  emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
  
-    case 3:
-      return "xxlxor %x0,%x0,%x0";
+  if (<MODE>mode == V2DImode)
+    emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
  
-    case 4:
-      return output_vec_const_move (operands);
+  else if (<MODE>mode == V4SImode)
+    emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
+
+  else if (<MODE>mode == V8HImode)
+    emit_insn (gen_altivec_vupkhsb  (op0, tmp));
+
+  else
+    gcc_unreachable ();
  
-    case 5:
-      return "stvx %1,%y0";
+  DONE;
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "8")])
  
-    case 6:
-      return "lvx %0,%y1";
  
-    case 7:
-      if (TARGET_STRING)
-        return \"stswi %1,%P0,16\";
+;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
+;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
+;; all 1's, since the machine does not have to wait for the previous
+;; instruction using the register being set (such as a store waiting on a slow
+;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
  
-    case 8:
-      return \"#\";
+;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
+;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
+;;              VSX 0/-1   GPR 0/-1   VMX const GPR const  LVX (VMX)   STVX (VMX)
+(define_insn "*vsx_mov<mode>_64bit"
+  [(set (match_operand:VSX_M 0 "nonimmediate_operand"
+               "=ZwO,      <VSa>,     <VSa>,     r,         we,        ?wQ,
+                ?&r,       ??r,       ??Y,       ??r,       wo,        v,
+                ?<VSa>,    *r,        v,         ??r,       wZ,        v")
  
-    case 9:
-      /* If the address is not used in the output, we can use lsi.  Otherwise,
-        fall through to generating four loads.  */
-      if (TARGET_STRING
-          && ! reg_overlap_mentioned_p (operands[0], operands[1]))
-       return \"lswi %0,%P1,16\";
-      /* ... fall through ...  */
+       (match_operand:VSX_M 1 "input_operand" 
+               "<VSa>,     ZwO,       <VSa>,     we,        r,         r,
+                wQ,        Y,         r,         r,         wE,        jwM,
+                ?jwM,      jwM,       W,         W,         v,         wZ"))]
  
-    case 10:
-    case 11:
-    case 12:
-      return \"#\";
-    default:
-      gcc_unreachable ();
-    }
+  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+   && (register_operand (operands[0], <MODE>mode) 
+       || register_operand (operands[1], <MODE>mode))"
+{
+  return rs6000_output_move_128bit (operands);
+}
+  [(set_attr "type"
+               "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
+                store,     load,      store,     *,         vecsimple, vecsimple,
+                vecsimple, *,         *,         *,         vecstore,  vecload")
+
+   (set_attr "length"
+               "4,         4,         4,         8,         4,         8,
+                8,         8,         8,         8,         4,         4,
+                4,         8,         20,        20,        4,         4")])
+
+;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
+;;              XXSPLTIB   VSPLTISW   VSX 0/-1   GPR 0/-1   VMX const  GPR const
+;;              LVX (VMX)  STVX (VMX)
+(define_insn "*vsx_mov<mode>_32bit"
+  [(set (match_operand:VSX_M 0 "nonimmediate_operand"
+               "=ZwO,      <VSa>,     <VSa>,     ??r,       ??Y,       ??r,
+                wo,        v,         ?<VSa>,    *r,        v,         ??r,
+                wZ,        v")
+
+       (match_operand:VSX_M 1 "input_operand" 
+               "<VSa>,     ZwO,       <VSa>,     Y,         r,         r,
+                wE,        jwM,       ?jwM,      jwM,       W,         W,
+                v,         wZ"))]
+
+  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+   && (register_operand (operands[0], <MODE>mode) 
+       || register_operand (operands[1], <MODE>mode))"
+{
+  return rs6000_output_move_128bit (operands);
  }
-  [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,store,load,load, *, *")
-   (set_attr "update" "     *,      *,        *,       *,         *,       *,      *,  yes,  yes, yes, yes, *, *")
-   (set_attr "length" "     4,      4,        4,       4,         8,       4,      4,   16,   16,  16,  16,16,16")
-   (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
-                                         (const_string "always")
-                                         (const_string "conditional")))])
+  [(set_attr "type"
+               "vecstore,  vecload,   vecsimple, load,      store,    *,
+                vecsimple, vecsimple, vecsimple, *,         *,        *,
+                vecstore,  vecload")
+
+   (set_attr "length"
+               "4,         4,         4,         16,        16,        16,
+                4,         4,         4,         16,        20,        32,
+                4,         4")])
  
  ;; Explicit  load/store expanders for the builtin functions
  (define_expand "vsx_load_<mode>"
@@ -2354,7 +2397,52 @@
     lxvdsx %x0,%y1"
    [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")])
  
-;; V4SF/V4SI splat
+;; V4SI splat (ISA 3.0)
+;; When SI's are allowed in VSX registers, add XXSPLTW support
+(define_expand "vsx_splat_<mode>"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "")
+       (vec_duplicate:VSX_W
+        (match_operand:<VS_scalar> 1 "splat_input_operand" "")))]
+  "TARGET_P9_VECTOR"
+{
+  if (MEM_P (operands[1]))
+    operands[1] = rs6000_address_for_fpconvert (operands[1]);
+  else if (!REG_P (operands[1]))
+    operands[1] = force_reg (<VS_scalar>mode, operands[1]);
+})
+
+(define_insn "*vsx_splat_v4si_internal"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
+       (vec_duplicate:V4SI
+        (match_operand:SI 1 "reg_or_indexed_operand" "r,Z")))]
+  "TARGET_P9_VECTOR"
+  "@
+   mtvsrws %x0,%1
+   lxvwsx %x0,%y1"
+  [(set_attr "type" "mftgpr,vecload")])
+
+;; V4SF splat (ISA 3.0)
+(define_insn_and_split "*vsx_splat_v4sf_internal"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
+       (vec_duplicate:V4SF
+        (match_operand:SF 1 "reg_or_indexed_operand" "Z,wy,r")))]
+  "TARGET_P9_VECTOR"
+  "@
+   lxvwsx %x0,%y1
+   #
+   mtvsrws %x0,%1"
+  "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
+  [(set (match_dup 0)
+       (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
+   (set (match_dup 0)
+       (vec_duplicate:V4SF
+        (vec_select:SF (match_dup 0)
+                       (parallel [(const_int 0)]))))]
+  ""
+  [(set_attr "type" "vecload,vecperm,mftgpr")
+   (set_attr "length" "4,8,4")])
+
+;; V4SF/V4SI splat from a vector element
  (define_insn "vsx_xxspltw_<mode>"
    [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
         (vec_duplicate:VSX_W
@@ -2597,21 +2685,50 @@
  (define_peephole
    [(set (match_operand:P 0 "base_reg_operand" "")
         (match_operand:P 1 "short_cint_operand" ""))
-   (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
-       (mem:VSX_M2 (plus:P (match_dup 0)
-                           (match_operand:P 3 "int_reg_operand" ""))))]
+   (set (match_operand:VSX_M 2 "vsx_register_operand" "")
+       (mem:VSX_M (plus:P (match_dup 0)
+                          (match_operand:P 3 "int_reg_operand" ""))))]
    "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
-  "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"  
+  "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"  
    [(set_attr "length" "8")
     (set_attr "type" "vecload")])
  
  (define_peephole
    [(set (match_operand:P 0 "base_reg_operand" "")
         (match_operand:P 1 "short_cint_operand" ""))
-   (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
-       (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
-                           (match_dup 0))))]
+   (set (match_operand:VSX_M 2 "vsx_register_operand" "")
+       (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand" "")
+                          (match_dup 0))))]
    "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
-  "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"  
+  "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"  
    [(set_attr "length" "8")
     (set_attr "type" "vecload")])
+
+\f
+;; ISA 3.0 vector extend sign support
+
+(define_insn "vsx_sign_extend_qi_<mode>"
+  [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
+       (unspec:VSINT_84
+        [(match_operand:V16QI 1 "vsx_register_operand" "v")]
+        UNSPEC_VSX_SIGN_EXTEND))]
+  "TARGET_P9_VECTOR"
+  "vextsb2<wd> %0,%1"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*vsx_sign_extend_hi_<mode>"
+  [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
+       (unspec:VSINT_84
+        [(match_operand:V8HI 1 "vsx_register_operand" "v")]
+        UNSPEC_VSX_SIGN_EXTEND))]
+  "TARGET_P9_VECTOR"
+  "vextsh2<wd> %0,%1"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "*vsx_sign_extend_si_v2di"
+  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
+       (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
+                    UNSPEC_VSX_SIGN_EXTEND))]
+  "TARGET_P9_VECTOR"
+  "vextsw2d %0,%1"
+  [(set_attr "type" "vecsimple")])
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi

index 6915fb24e01e0c5ee65640f7c5cec95dedfdc276..e7b51c1059342752ccb9f92c5d49955a634e67f3 100644 (file)
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3214,6 +3214,9 @@ Floating point register if the LFIWZX instruction is enabled or NO_REGS.
  @item wD
  Int constant that is the element number of the 64-bit scalar in a vector.
  
+@item wE
+Vector constant that can be loaded with the XXSPLTIB instruction.
+
  @item wF
  Memory operand suitable for power9 fusion load/stores.
  
@@ -3221,9 +3224,12 @@ Memory operand suitable for power9 fusion load/stores.
  Memory operand suitable for TOC fusion memory references.
  
  @item wL
-Int constant that is the element number that the MFVSRLD instruction
+Int constant that is the element number that the MFVSRLD instruction.
  targets.
  
+@item wM
+Match vector constant with all 1's if the XXLORC instruction is available.
+
  @item wO
  A memory operand suitable for the ISA 3.0 vector d-form instructions.
  
@@ -3231,6 +3237,9 @@ A memory operand suitable for the ISA 3.0 vector d-form instructions.
  A memory address that will work with the @code{lq} and @code{stq}
  instructions.
  
+@item wS
+Vector constant that can be loaded with XXSPLTIB & sign extension.
+
  @item h
  @samp{MQ}, @samp{CTR}, or @samp{LINK} register
  
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index d7189e7257d3a2f567d51cf014f7e5b5c57536d0..d77545d03bde055d8aa5135624662637caeb4f27 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,12 @@
+2016-05-18  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+       * gcc.target/powerpc/p9-splat-1.c: New tests for ISA 3.0 word
+       splat operations and the XXSPLTIB instruction.
+       * gcc.target/powerpc/p9-splat-2.c: Likewise.
+       * gcc.target/powerpc/p9-splat-3.c: Likewise.
+       * gcc.target/powerpc/pr47755.c: Allow vspltisw in addition to
+       xxlxor to clear a register.
+
  2016-05-18  Richard Sandiford  <richard.sandiford@arm.com>
  
         * gcc.dg/torture/pr71020.c: New test.
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-splat-1.c b/gcc/testsuite/gcc.target/powerpc/p9-splat-1.c

new file mode 100644 (file)

index 0000000..13b7287
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p9-splat-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+
+#include <altivec.h>
+
+vector int
+foo_r (int a)
+{
+  return (vector int) { a, a, a, a };          /* mtvsrws */
+}
+
+vector int
+foo_r2 (int a)
+{
+  return vec_splats (a);                       /* mtvsrws */
+}
+
+vector int
+foo_p (int *a)
+{
+  return (vector int) { *a, *a, *a, *a };      /* lxvwsx */
+}
+
+/* { dg-final { scan-assembler-times "mtvsrws" 2 } } */
+/* { dg-final { scan-assembler-times "lxvwsx"  1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-splat-2.c b/gcc/testsuite/gcc.target/powerpc/p9-splat-2.c

new file mode 100644 (file)

index 0000000..2468e92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p9-splat-2.c
@@ -0,0 +1,38 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+
+#include <altivec.h>
+
+vector float
+foo_r (float a)
+{
+  return (vector float) { a, a, a, a };                        /* xscvdpspn/xxspltw */
+}
+
+vector float
+foo_r2 (float a)
+{
+  return vec_splats (a);                               /* xscvdpspn/xxspltw */
+}
+
+vector float
+foo_g (float *a)
+{
+  float f = *a;
+
+  __asm__ (" # %0" : "+r" (f));
+  return (vector float) { f, f, f, f };                        /* mtvsrws */
+}
+
+vector float
+foo_p (float *a)
+{
+  return (vector float) { *a, *a, *a, *a };            /* lxvwsx */
+}
+
+/* { dg-final { scan-assembler-times "xscvdpspn" 2 } } */
+/* { dg-final { scan-assembler-times "xxspltw"   2 } } */
+/* { dg-final { scan-assembler-times "mtvsrws"   1 } } */
+/* { dg-final { scan-assembler-times "lxvwsx"    1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-splat-3.c b/gcc/testsuite/gcc.target/powerpc/p9-splat-3.c

new file mode 100644 (file)

index 0000000..8a121da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p9-splat-3.c
@@ -0,0 +1,61 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+
+#include <altivec.h>
+
+typedef vector signed char     v16qi_t;
+typedef vector short           v8hi_t;
+typedef vector int             v4si_t;
+typedef vector long long       v2di_t;
+
+void v16qi_0a  (v16qi_t *p) { *p = (v16qi_t) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; }
+void v8hi_0a   (v8hi_t  *p) { *p = (v8hi_t)  { 0, 0, 0, 0, 0, 0, 0, 0 }; }
+void v4si_0a   (v4si_t  *p) { *p = (v4si_t)  { 0, 0, 0, 0 }; }
+void v2di_0a   (v2di_t  *p) { *p = (v2di_t)  { 0, 0 }; }
+
+void v16qi_0b  (v16qi_t *p) { *p = (v16qi_t) vec_splats ((signed char)0); }
+void v8hi_0b   (v8hi_t  *p) { *p = (v8hi_t)  vec_splats ((short)0); }
+void v4si_0b   (v4si_t  *p) { *p = (v4si_t)  vec_splats ((int)0); }
+void v2di_0b   (v2di_t  *p) { *p = (v2di_t)  vec_splats ((long long)0); }
+
+void v16qi_m1a (v16qi_t *p) { *p = (v16qi_t) { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; }
+void v8hi_m1a  (v8hi_t  *p) { *p = (v8hi_t)  { -1, -1, -1, -1, -1, -1, -1, -1 }; }
+void v4si_m1a  (v4si_t  *p) { *p = (v4si_t)  { -1, -1, -1, -1 }; }
+void v2di_m1a  (v2di_t  *p) { *p = (v2di_t)  { -1, -1 }; }
+
+void v16qi_m1b (v16qi_t *p) { *p = (v16qi_t) vec_splats ((signed char)-1); }
+void v8hi_m1b  (v8hi_t  *p) { *p = (v8hi_t)  vec_splats ((short)-1); }
+void v4si_m1b  (v4si_t  *p) { *p = (v4si_t)  vec_splats ((int)-1); }
+void v2di_m1b  (v2di_t  *p) { *p = (v2di_t)  vec_splats ((long long)-1); }
+
+void v16qi_5a  (v16qi_t *p) { *p = (v16qi_t) { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }; }
+void v8hi_5a   (v8hi_t  *p) { *p = (v8hi_t)  { 5, 5, 5, 5, 5, 5, 5, 5 }; }
+void v4si_5a   (v4si_t  *p) { *p = (v4si_t)  { 5, 5, 5, 5 }; }
+void v2di_5a   (v2di_t  *p) { *p = (v2di_t)  { 5, 5 }; }
+
+void v16qi_5b  (v16qi_t *p) { *p = (v16qi_t) vec_splats ((signed char)5); }
+void v8hi_5b   (v8hi_t  *p) { *p = (v8hi_t)  vec_splats ((short)5); }
+void v4si_5b   (v4si_t  *p) { *p = (v4si_t)  vec_splats ((int)5); }
+void v2di_5b   (v2di_t  *p) { *p = (v2di_t)  vec_splats ((long long)5); }
+
+void v16qi_33a (v16qi_t *p) { *p = (v16qi_t) { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 }; }
+void v8hi_33a  (v8hi_t  *p) { *p = (v8hi_t)  { 33, 33, 33, 33, 33, 33, 33, 33 }; }
+void v4si_33a  (v4si_t  *p) { *p = (v4si_t)  { 33, 33, 33, 33 }; }
+void v2di_33a  (v2di_t  *p) { *p = (v2di_t)  { 33, 33 }; }
+
+void v16qi_33b (v16qi_t *p) { *p = (v16qi_t) vec_splats ((signed char)33); }
+void v8hi_33b  (v8hi_t  *p) { *p = (v8hi_t)  vec_splats ((short)33); }
+void v4si_33b  (v4si_t  *p) { *p = (v4si_t)  vec_splats ((int)33); }
+void v2di_33b  (v2di_t  *p) { *p = (v2di_t)  vec_splats ((long long)33); }
+
+/* { dg-final { scan-assembler     "xxspltib"     } } */
+/* { dg-final { scan-assembler     "vextsb2d"     } } */
+/* { dg-final { scan-assembler     "vextsb2w"     } } */
+/* { dg-final { scan-assembler     "vupk\[hl\]sb" } } */
+/* { dg-final { scan-assembler-not "lxvd2x"       } } */
+/* { dg-final { scan-assembler-not "lxvw4x"       } } */
+/* { dg-final { scan-assembler-not "lxv "         } } */
+/* { dg-final { scan-assembler-not "lxvx"         } } */
+/* { dg-final { scan-assembler-not "lvx"          } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr47755.c b/gcc/testsuite/gcc.target/powerpc/pr47755.c

index 8feef291e562b956b003104b8e00168eb7fdddbc..d5feecac6918a10b0677da706f36ecde7470c963 100644 (file)
--- a/gcc/testsuite/gcc.target/powerpc/pr47755.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr47755.c
@@ -3,7 +3,7 @@
  /* { dg-require-effective-target powerpc_vsx_ok } */
  /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */
  /* { dg-options "-O3 -mcpu=power7" } */
-/* { dg-final { scan-assembler "xxlxor" } } */
+/* { dg-final { scan-assembler "xxlxor\|vspltis\[bhw\]" } } */
  /* { dg-final { scan-assembler-not "lxvd2x" } } */
  /* { dg-final { scan-assembler-not "lxvw4x" } } */
  /* { dg-final { scan-assembler-not "lvx" } } */
author	Michael Meissner <meissner@linux.vnet.ibm.com>
	Wed, 18 May 2016 14:04:32 +0000 (14:04 +0000)
committer	Michael Meissner <meissner@gcc.gnu.org>
	Wed, 18 May 2016 14:04:32 +0000 (14:04 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/rs6000/constraints.md		patch \| blob \| history
gcc/config/rs6000/predicates.md		patch \| blob \| history
gcc/config/rs6000/rs6000-protos.h		patch \| blob \| history
gcc/config/rs6000/rs6000.c		patch \| blob \| history
gcc/config/rs6000/vsx.md		patch \| blob \| history
gcc/doc/md.texi		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/powerpc/p9-splat-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/powerpc/p9-splat-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/powerpc/p9-splat-3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/powerpc/pr47755.c		patch \| blob \| history