rs6000.c (rs6000_expand_vector_init): Set initialization of all 0's to the 0 constant...

author Michael Meissner <meissner@linux.vnet.ibm.com>

Tue, 23 Aug 2016 20:41:32 +0000 (20:41 +0000)

committer Michael Meissner <meissner@gcc.gnu.org>

Tue, 23 Aug 2016 20:41:32 +0000 (20:41 +0000)
author Michael Meissner <meissner@linux.vnet.ibm.com>
Tue, 23 Aug 2016 20:41:32 +0000 (20:41 +0000)
committer Michael Meissner <meissner@gcc.gnu.org>
Tue, 23 Aug 2016 20:41:32 +0000 (20:41 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index a585755261e2e029c60555520ba72b171835202f..e1ecbed5e151e165df0bc15c2f6b27bd82bf7cf0 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,53 @@
+2016-08-23  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+       * config/rs6000/rs6000.c (rs6000_expand_vector_init): Set
+       initialization of all 0's to the 0 constant, instead of directly
+       generating XOR.  Add support for V4SImode vector initialization on
+       64-bit systems with direct move, and rework the ISA 3.0 V4SImode
+       initialization.  Change variables used in V4SFmode vector
+       intialization.  For V4SFmode vector splat on ISA 3.0, make sure
+       any memory addresses are in index form.  Add support for using
+       VSPLTH/VSPLTB to initialize vector short and vector char vectors
+       with all of the same element.
+       (regno_or_subregno): New helper function to return a register
+       number for either REG or SUBREG.
+       (rs6000_adjust_vec_address): Do not generate ADDI <reg>,R0,<num>.
+       Use regno_or_subregno where possible.
+       (rs6000_split_v4si_init_di_reg): New helper function to build up a
+       DImode value from two SImode values in order to generate V4SImode
+       vector initialization on 64-bit systems with direct move.
+       (rs6000_split_v4si_init): Split up the insns for a V4SImode vector
+       initialization.
+       (rtx_is_swappable_p): V4SImode vector initialization insn is not
+       swappable.
+       * config/rs6000/rs6000-protos.h (rs6000_split_v4si_init): Add
+       declaration.
+       * config/rs6000/vsx.md (VSX_SPLAT_I): New mode iterators and
+       attributes to initialize V8HImode and V16QImode vectors with the
+       same element.
+       (VSX_SPLAT_COUNT): Likewise.
+       (VSX_SPLAT_SUFFIX): Likewise.
+       (UNSPEC_VSX_VEC_INIT): New unspec.
+       (vsx_concat_v2sf): Eliminate using 'preferred' register classes.
+       Allow SFmode values to come from Altivec registers.
+       (vsx_init_v4si): New insn/split for V4SImode vector initialization
+       on 64-bit systems with direct move.
+       (vsx_splat_<mode>, VSX_W iterator): Rework V4SImode and V4SFmode
+       vector initializations, to allow V4SImode vector initializations
+       on 64-bit systems with direct move.
+       (vsx_splat_v4si): Likewise.
+       (vsx_splat_v4si_di): Likewise.
+       (vsx_splat_v4sf): Likewise.
+       (vsx_splat_v4sf_internal): Likewise.
+       (vsx_xxspltw_<mode>, VSX_W iterator): Eliminate using 'preferred'
+       register classes.
+       (vsx_xxspltw_<mode>_direct, VSX_W iterator): Likewise.
+       (vsx_vsplt<VSX_SPLAT_SUFFIX>_di): New insns to support
+       initializing V8HImode and V16QImode vectors with the same
+       element.
+       * config/rs6000/rs6000.h (TARGET_DIRECT_MOVE_64BIT): Disallow
+       optimization if -maltivec=be.
+
  2016-08-23  Christophe Lyon  <christophe.lyon@linaro.org>
  
         * config/arm/arm.md (arm_movqi_insn): Swap predicable_short_it
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h

index c15703b12e06df9e8a06bc35770162a418fbbae2..6ed7024da1648d6dfefdf910a4771bca5a1af32d 100644 (file)
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -65,6 +65,7 @@ extern void rs6000_expand_vector_set (rtx, rtx, int);
  extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
  extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
  extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
+extern void rs6000_split_v4si_init (rtx []);
  extern bool altivec_expand_vec_perm_const (rtx op[4]);
  extern void altivec_expand_vec_perm_le (rtx op[4]);
  extern bool rs6000_expand_vec_perm_const (rtx op[4]);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c

index b232099b9e7d536b266e77fc5d8d7fc970869086..4de70ea86efd09d3df0d8d39091cce8d6b4c4743 100644 (file)
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6695,7 +6695,7 @@ rs6000_expand_vector_init (rtx target, rtx vals)
        if ((int_vector_p || TARGET_VSX) && all_const_zero)
         {
           /* Zero register.  */
-         emit_insn (gen_rtx_SET (target, gen_rtx_XOR (mode, target, target)));
+         emit_move_insn (target, CONST0_RTX (mode));
           return;
         }
        else if (int_vector_p && easy_vector_constant (const_vec, mode))
@@ -6738,32 +6738,69 @@ rs6000_expand_vector_init (rtx target, rtx vals)
        return;
      }
  
-  /* Word values on ISA 3.0 can use mtvsrws, lxvwsx, or vspltisw.  V4SF is
-     complicated since scalars are stored as doubles in the registers.  */
-  if (TARGET_P9_VECTOR && mode == V4SImode && all_same
-      && VECTOR_MEM_VSX_P (mode))
+  /* Special case initializing vector int if we are on 64-bit systems with
+     direct move or we have the ISA 3.0 instructions.  */
+  if (mode == V4SImode  && VECTOR_MEM_VSX_P (V4SImode)
+      && TARGET_DIRECT_MOVE_64BIT)
      {
-      emit_insn (gen_vsx_splat_v4si (target, XVECEXP (vals, 0, 0)));
-      return;
+      if (all_same)
+       {
+         rtx element0 = XVECEXP (vals, 0, 0);
+         if (MEM_P (element0))
+           element0 = rs6000_address_for_fpconvert (element0);
+         else
+           element0 = force_reg (SImode, element0);
+
+         if (TARGET_P9_VECTOR)
+           emit_insn (gen_vsx_splat_v4si (target, element0));
+         else
+           {
+             rtx tmp = gen_reg_rtx (DImode);
+             emit_insn (gen_zero_extendsidi2 (tmp, element0));
+             emit_insn (gen_vsx_splat_v4si_di (target, tmp));
+           }
+         return;
+       }
+      else
+       {
+         rtx elements[4];
+         size_t i;
+
+         for (i = 0; i < 4; i++)
+           {
+             elements[i] = XVECEXP (vals, 0, i);
+             if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
+               elements[i] = copy_to_mode_reg (SImode, elements[i]);
+           }
+
+         emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
+                                       elements[2], elements[3]));
+         return;
+       }
      }
  
    /* With single precision floating point on VSX, know that internally single
       precision is actually represented as a double, and either make 2 V2DF
       vectors, and convert these vectors to single precision, or do one
       conversion, and splat the result to the other elements.  */
-  if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
+  if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
      {
        if (all_same)
         {
-         rtx op0 = XVECEXP (vals, 0, 0);
+         rtx element0 = XVECEXP (vals, 0, 0);
  
           if (TARGET_P9_VECTOR)
-           emit_insn (gen_vsx_splat_v4sf (target, op0));
+           {
+             if (MEM_P (element0))
+               element0 = rs6000_address_for_fpconvert (element0);
+
+             emit_insn (gen_vsx_splat_v4sf (target, element0));
+           }
  
           else
             {
               rtx freg = gen_reg_rtx (V4SFmode);
-             rtx sreg = force_reg (SFmode, op0);
+             rtx sreg = force_reg (SFmode, element0);
               rtx cvt  = (TARGET_XSCVDPSPN
                           ? gen_vsx_xscvdpspn_scalar (freg, sreg)
                           : gen_vsx_xscvdpsp_scalar (freg, sreg));
@@ -6793,6 +6830,32 @@ rs6000_expand_vector_init (rtx target, rtx vals)
        return;
      }
  
+  /* Special case initializing vector short/char that are splats if we are on
+     64-bit systems with direct move.  */
+  if (all_same && TARGET_DIRECT_MOVE_64BIT
+      && (mode == V16QImode || mode == V8HImode))
+    {
+      rtx op0 = XVECEXP (vals, 0, 0);
+      rtx di_tmp = gen_reg_rtx (DImode);
+
+      if (!REG_P (op0))
+       op0 = force_reg (GET_MODE_INNER (mode), op0);
+
+      if (mode == V16QImode)
+       {
+         emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
+         emit_insn (gen_vsx_vspltb_di (target, di_tmp));
+         return;
+       }
+
+      if (mode == V8HImode)
+       {
+         emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
+         emit_insn (gen_vsx_vsplth_di (target, di_tmp));
+         return;
+       }
+    }
+
    /* Store value to stack temp.  Load vector element.  Splat.  However, splat
       of 64-bit items is not supported on Altivec.  */
    if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
@@ -7032,6 +7095,18 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
    emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
  }
  
+/* Helper function to return the register number of a RTX.  */
+static inline int
+regno_or_subregno (rtx op)
+{
+  if (REG_P (op))
+    return REGNO (op);
+  else if (SUBREG_P (op))
+    return subreg_regno (op);
+  else
+    gcc_unreachable ();
+}
+
  /* Adjust a memory address (MEM) of a vector type to point to a scalar field
     within the vector (ELEMENT) with a mode (SCALAR_MODE).  Use a base register
     temporary (BASE_TMP) to fixup the address.  Return the new memory address
@@ -7111,14 +7186,22 @@ rs6000_adjust_vec_address (rtx scalar_reg,
         }
        else
         {
-         if (REG_P (op1) || SUBREG_P (op1))
+         bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
+         bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
+
+         /* Note, ADDI requires the register being added to be a base
+            register.  If the register was R0, load it up into the temporary
+            and do the add.  */
+         if (op1_reg_p
+             && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
             {
               insn = gen_add3_insn (base_tmp, op1, element_offset);
               gcc_assert (insn != NULL_RTX);
               emit_insn (insn);
             }
  
-         else if (REG_P (element_offset) || SUBREG_P (element_offset))
+         else if (ele_reg_p
+                  && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
             {
               insn = gen_add3_insn (base_tmp, element_offset, op1);
               gcc_assert (insn != NULL_RTX);
@@ -7147,14 +7230,7 @@ rs6000_adjust_vec_address (rtx scalar_reg,
      {
        rtx op1 = XEXP (new_addr, 1);
        addr_mask_type addr_mask;
-      int scalar_regno;
-
-      if (REG_P (scalar_reg))
-       scalar_regno = REGNO (scalar_reg);
-      else if (SUBREG_P (scalar_reg))
-       scalar_regno = subreg_regno (scalar_reg);
-      else
-       gcc_unreachable ();
+      int scalar_regno = regno_or_subregno (scalar_reg);
  
        gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
        if (INT_REGNO_P (scalar_regno))
@@ -7321,6 +7397,93 @@ rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
      gcc_unreachable ();
   }
  
+/* Helper function for rs6000_split_v4si_init to build up a DImode value from
+   two SImode values.  */
+
+static void
+rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
+{
+  const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
+
+  if (CONST_INT_P (si1) && CONST_INT_P (si2))
+    {
+      unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
+      unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
+
+      emit_move_insn (dest, GEN_INT (const1 | const2));
+      return;
+    }
+
+  /* Put si1 into upper 32-bits of dest.  */
+  if (CONST_INT_P (si1))
+    emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
+  else
+    {
+      /* Generate RLDIC.  */
+      rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
+      rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
+      rtx mask_rtx = GEN_INT (mask_32bit << 32);
+      rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
+      gcc_assert (!reg_overlap_mentioned_p (dest, si1));
+      emit_insn (gen_rtx_SET (dest, and_rtx));
+    }
+
+  /* Put si2 into the temporary.  */
+  gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
+  if (CONST_INT_P (si2))
+    emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
+  else
+    emit_insn (gen_zero_extendsidi2 (tmp, si2));
+
+  /* Combine the two parts.  */
+  emit_insn (gen_iordi3 (dest, dest, tmp));
+  return;
+}
+
+/* Split a V4SI initialization.  */
+
+void
+rs6000_split_v4si_init (rtx operands[])
+{
+  rtx dest = operands[0];
+
+  /* Destination is a GPR, build up the two DImode parts in place.  */
+  if (REG_P (dest) || SUBREG_P (dest))
+    {
+      int d_regno = regno_or_subregno (dest);
+      rtx scalar1 = operands[1];
+      rtx scalar2 = operands[2];
+      rtx scalar3 = operands[3];
+      rtx scalar4 = operands[4];
+      rtx tmp1 = operands[5];
+      rtx tmp2 = operands[6];
+
+      /* Even though we only need one temporary (plus the destination, which
+        has an early clobber constraint, try to use two temporaries, one for
+        each double word created.  That way the 2nd insn scheduling pass can
+        rearrange things so the two parts are done in parallel.  */
+      if (BYTES_BIG_ENDIAN)
+       {
+         rtx di_lo = gen_rtx_REG (DImode, d_regno);
+         rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
+         rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
+         rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
+       }
+      else
+       {
+         rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
+         rtx di_hi = gen_rtx_REG (DImode, d_regno);
+         gcc_assert (!VECTOR_ELT_ORDER_BIG);
+         rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
+         rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
+       }
+      return;
+    }
+
+  else
+    gcc_unreachable ();
+}
+
  /* Return TRUE if OP is an invalid SUBREG operation on the e500.  */
  
  bool
@@ -39054,6 +39217,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
           case UNSPEC_VSX_CVSPDPN:
           case UNSPEC_VSX_EXTRACT:
           case UNSPEC_VSX_VSLO:
+         case UNSPEC_VSX_VEC_INIT:
             return 0;
           case UNSPEC_VSPLT_DIRECT:
             *special = SH_SPLAT;
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h

index 353f388d770abbaf08a5cf1ccbca1252a382202c..1609abd68bc0da9da6a8a59e90fc5f5ec175ef23 100644 (file)
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -760,13 +760,15 @@ extern int rs6000_vector_align[];
                                  && TARGET_SINGLE_FLOAT                 \
                                  && TARGET_DOUBLE_FLOAT)
  
-/* Macro to say whether we can do optimization where we need to do parts of the
-   calculation in 64-bit GPRs and then is transfered to the vector
-   registers.  */
+/* Macro to say whether we can do optimizations where we need to do parts of
+   the calculation in 64-bit GPRs and then is transfered to the vector
+   registers.  Do not allow -maltivec=be for these optimizations, because it
+   adds to the complexity of the code.  */
  #define TARGET_DIRECT_MOVE_64BIT       (TARGET_DIRECT_MOVE             \
                                          && TARGET_P8_VECTOR            \
                                          && TARGET_POWERPC64            \
-                                        && TARGET_UPPER_REGS_DI)
+                                        && TARGET_UPPER_REGS_DI        \
+                                        && (rs6000_altivec_element_order != 2))
  
  /* Whether the various reciprocal divide/square root estimate instructions
     exist, and whether we should automatically generate code for the instruction
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md

index f64b4d8af20617de4132e05ebca4853a6ec30b48..60917c541c727afa7673640ca8fb7b7c95563338 100644 (file)
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -281,6 +281,16 @@
                           (V8HI  "v")
                           (V4SI  "wa")])
  
+;; Iterator for the 2 short vector types to do a splat from an integer
+(define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
+
+;; Mode attribute to give the count for the splat instruction to splat
+;; the value in the 64-bit integer slot
+(define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
+
+;; Mode attribute to give the suffix for the splat instruction
+(define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
+
  ;; Constants for creating unspecs
  (define_c_enum "unspec"
    [UNSPEC_VSX_CONCAT
@@ -323,6 +333,7 @@
     UNSPEC_VSX_VXSIG
     UNSPEC_VSX_VIEXP
     UNSPEC_VSX_VTSTDC
+   UNSPEC_VSX_VEC_INIT
    ])
  
  ;; VSX moves
@@ -1950,10 +1961,10 @@
  ;; together, relying on the fact that internally scalar floats are represented
  ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
  (define_insn "vsx_concat_v2sf"
-  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
         (unspec:V2DF
-        [(match_operand:SF 1 "vsx_register_operand" "f,f")
-         (match_operand:SF 2 "vsx_register_operand" "f,f")]
+        [(match_operand:SF 1 "vsx_register_operand" "ww")
+         (match_operand:SF 2 "vsx_register_operand" "ww")]
          UNSPEC_VSX_CONCAT))]
    "VECTOR_MEM_VSX_P (V2DFmode)"
  {
@@ -1964,6 +1975,26 @@
  }
    [(set_attr "type" "vecperm")])
  
+;; V4SImode initialization splitter
+(define_insn_and_split "vsx_init_v4si"
+  [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
+       (unspec:V4SI
+        [(match_operand:SI 1 "reg_or_cint_operand" "rn")
+         (match_operand:SI 2 "reg_or_cint_operand" "rn")
+         (match_operand:SI 3 "reg_or_cint_operand" "rn")
+         (match_operand:SI 4 "reg_or_cint_operand" "rn")]
+        UNSPEC_VSX_VEC_INIT))
+   (clobber (match_scratch:DI 5 "=&r"))
+   (clobber (match_scratch:DI 6 "=&r"))]
+   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+   "#"
+   "&& reload_completed"
+   [(const_int 0)]
+{
+  rs6000_split_v4si_init (operands);
+  DONE;
+})
+
  ;; xxpermdi for little endian loads and stores.  We need several of
  ;; these since the form of the PARALLEL differs by mode.
  (define_insn "*vsx_xxpermdi2_le_<mode>"
@@ -2674,32 +2705,33 @@
     mtvsrdd %x0,%1,%1"
    [(set_attr "type" "vecperm,vecload,vecperm")])
  
-;; V4SI splat (ISA 3.0)
-;; When SI's are allowed in VSX registers, add XXSPLTW support
-(define_expand "vsx_splat_<mode>"
-  [(set (match_operand:VSX_W 0 "vsx_register_operand" "")
-       (vec_duplicate:VSX_W
-        (match_operand:<VS_scalar> 1 "splat_input_operand" "")))]
-  "TARGET_P9_VECTOR"
-{
-  if (MEM_P (operands[1]))
-    operands[1] = rs6000_address_for_fpconvert (operands[1]);
-  else if (!REG_P (operands[1]))
-    operands[1] = force_reg (<VS_scalar>mode, operands[1]);
-})
-
-(define_insn "*vsx_splat_v4si_internal"
-  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
+;; V4SI splat support
+(define_insn "vsx_splat_v4si"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
         (vec_duplicate:V4SI
          (match_operand:SI 1 "splat_input_operand" "r,Z")))]
    "TARGET_P9_VECTOR"
    "@
     mtvsrws %x0,%1
     lxvwsx %x0,%y1"
-  [(set_attr "type" "mftgpr,vecload")])
+  [(set_attr "type" "vecperm,vecload")])
+
+;; SImode is not currently allowed in vector registers.  This pattern
+;; allows us to use direct move to get the value in a vector register
+;; so that we can use XXSPLTW
+(define_insn "vsx_splat_v4si_di"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
+       (vec_duplicate:V4SI
+        (truncate:SI
+         (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
+  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+  "@
+   xxspltw %x0,%x1,1
+   mtvsrws %x0,%1"
+  [(set_attr "type" "vecperm")])
  
  ;; V4SF splat (ISA 3.0)
-(define_insn_and_split "*vsx_splat_v4sf_internal"
+(define_insn_and_split "vsx_splat_v4sf"
    [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
         (vec_duplicate:V4SF
          (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
@@ -2720,12 +2752,12 @@
  
  ;; V4SF/V4SI splat from a vector element
  (define_insn "vsx_xxspltw_<mode>"
-  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
         (vec_duplicate:VSX_W
          (vec_select:<VS_scalar>
-         (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
+         (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
           (parallel
-          [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
+          [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
    "VECTOR_MEM_VSX_P (<MODE>mode)"
  {
    if (!BYTES_BIG_ENDIAN)
@@ -2736,14 +2768,24 @@
    [(set_attr "type" "vecperm")])
  
  (define_insn "vsx_xxspltw_<mode>_direct"
-  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
-        (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
-                       (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
+        (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
+                       (match_operand:QI 2 "u5bit_cint_operand" "i")]
                        UNSPEC_VSX_XXSPLTW))]
    "VECTOR_MEM_VSX_P (<MODE>mode)"
    "xxspltw %x0,%x1,%2"
    [(set_attr "type" "vecperm")])
  
+;; V16QI/V8HI splat support on ISA 2.07
+(define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
+  [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
+       (vec_duplicate:VSX_SPLAT_I
+        (truncate:<VS_scalar>
+         (match_operand:DI 1 "altivec_register_operand" "v"))))]
+  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
+  "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
+  [(set_attr "type" "vecperm")])
+
  ;; V2DF/V2DI splat for use by vec_splat builtin
  (define_insn "vsx_xxspltd_<mode>"
    [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 4c45e35f50b1ced7beaad91e9234b639b2850af6..2812daad28f21b4af2d778a6bcc99e590fb86fb8 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-08-23  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+       * gcc.target/powerpc/vec-init-1.c: Add tests where the vector is
+       being created from pointers to memory locations.
+       * gcc.target/powerpc/vec-init-2.c: Likewise.
+
  2016-08-23  Fritz Reese  <fritzoreese@gmail.com>
  
         * gfortran.dg/dec_structure_12.f90: New testcase.
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-init-1.c b/gcc/testsuite/gcc.target/powerpc/vec-init-1.c

index 753330af49eb5d1c610738606d334a2182fec49f..079e6d843b48ba6c7f03112650bbdaa392f01b13 100644 (file)
--- a/gcc/testsuite/gcc.target/powerpc/vec-init-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-init-1.c
@@ -24,6 +24,9 @@ extern void check_splat (vector int a)
  extern vector int pack_reg (int a, int b, int c, int d)
    __attribute__((__noinline__));
  
+extern vector int pack_from_ptr (int *p_a, int *p_b, int *p_c, int *p_d)
+  __attribute__((__noinline__));
+
  extern vector int pack_const (void)
    __attribute__((__noinline__));
  
@@ -39,6 +42,9 @@ extern void pack_global (int a, int b, int c, int d)
  extern vector int splat_reg (int a)
    __attribute__((__noinline__));
  
+extern vector int splat_from_ptr (int *p)
+  __attribute__((__noinline__));
+
  extern vector int splat_const (void)
    __attribute__((__noinline__));
  
@@ -77,6 +83,12 @@ pack_reg (int a, int b, int c, int d)
    return (vector int) { a, b, c, d };
  }
  
+vector int
+pack_from_ptr (int *p_a, int *p_b, int *p_c, int *p_d)
+{
+  return (vector int) { *p_a, *p_b, *p_c, *p_d };
+}
+
  vector int
  pack_const (void)
  {
@@ -107,6 +119,12 @@ splat_reg (int a)
    return (vector int) { a, a, a, a };
  }
  
+vector int
+splat_from_ptr (int *p)
+{
+  return (vector int) { *p, *p, *p, *p };
+}
+
  vector int
  splat_const (void)
  {
@@ -134,11 +152,15 @@ splat_global (int a)
  int main (void)
  {
    vector int sv2, sv3;
+  int mem = SPLAT;
+  int mem2[4] = { ELEMENTS };
  
    check (sv);
  
    check (pack_reg (ELEMENTS));
  
+  check (pack_from_ptr (&mem2[0], &mem2[1], &mem2[2], &mem2[3]));
+
    check (pack_const ());
  
    pack_ptr (&sv2, ELEMENTS);
@@ -154,6 +176,8 @@ int main (void)
  
    check_splat (splat_reg (SPLAT));
  
+  check_splat (splat_from_ptr (&mem));
+
    check_splat (splat_const ());
  
    splat_ptr (&sv2, SPLAT);
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-init-2.c b/gcc/testsuite/gcc.target/powerpc/vec-init-2.c

index a9fe6f44e410bb75b687ffa56b79ea6f7001a907..ddd41e280ee02395efe96f843a16267239ea39c4 100644 (file)
--- a/gcc/testsuite/gcc.target/powerpc/vec-init-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-init-2.c
@@ -24,6 +24,9 @@ extern void check_splat (vector long a)
  extern vector long pack_reg (long a, long b)
    __attribute__((__noinline__));
  
+extern vector long pack_from_ptr (long *p_a, long *p_b)
+  __attribute__((__noinline__));
+
  extern vector long pack_const (void)
    __attribute__((__noinline__));
  
@@ -39,6 +42,9 @@ extern void pack_global (long a, long b)
  extern vector long splat_reg (long a)
    __attribute__((__noinline__));
  
+extern vector long splat_from_ptr (long *p)
+  __attribute__((__noinline__));
+
  extern vector long splat_const (void)
    __attribute__((__noinline__));
  
@@ -77,6 +83,12 @@ pack_reg (long a, long b)
    return (vector long) { a, b };
  }
  
+vector long
+pack_from_ptr (long *p_a, long *p_b)
+{
+  return (vector long) { *p_a, *p_b };
+}
+
  vector long
  pack_const (void)
  {
@@ -107,6 +119,12 @@ splat_reg (long a)
    return (vector long) { a, a };
  }
  
+vector long
+splat_from_ptr (long *p)
+{
+  return (vector long) { *p, *p };
+}
+
  vector long
  splat_const (void)
  {
@@ -134,11 +152,15 @@ splat_global (long a)
  int  main (void)
  {
    vector long sv2, sv3;
+  long mem = SPLAT;
+  long mem2[2] = { ELEMENTS };
  
    check (sv);
  
    check (pack_reg (ELEMENTS));
  
+  check (pack_from_ptr (&mem2[0], &mem2[1]));
+
    check (pack_const ());
  
    pack_ptr (&sv2, ELEMENTS);
@@ -154,6 +176,8 @@ int  main (void)
  
    check_splat (splat_reg (SPLAT));
  
+  check_splat (splat_from_ptr (&mem));
+
    check_splat (splat_const ());
  
    splat_ptr (&sv2, SPLAT);
author	Michael Meissner <meissner@linux.vnet.ibm.com>
	Tue, 23 Aug 2016 20:41:32 +0000 (20:41 +0000)
committer	Michael Meissner <meissner@gcc.gnu.org>
	Tue, 23 Aug 2016 20:41:32 +0000 (20:41 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/rs6000/rs6000-protos.h		patch \| blob \| history
gcc/config/rs6000/rs6000.c		patch \| blob \| history
gcc/config/rs6000/rs6000.h		patch \| blob \| history
gcc/config/rs6000/vsx.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/powerpc/vec-init-1.c		patch \| blob \| history
gcc/testsuite/gcc.target/powerpc/vec-init-2.c		patch \| blob \| history