rs6000-protos.h (rs6000_split_vec_extract_var): New declaration.
authorMichael Meissner <meissner@linux.vnet.ibm.com>
Thu, 28 Jul 2016 21:02:06 +0000 (21:02 +0000)
committerMichael Meissner <meissner@gcc.gnu.org>
Thu, 28 Jul 2016 21:02:06 +0000 (21:02 +0000)
[gcc]
2016-07-28  Michael Meissner  <meissner@linux.vnet.ibm.com>

* config/rs6000/rs6000-protos.h (rs6000_split_vec_extract_var):
New declaration.
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
Add support for vec_extract of vector double or vector long having
a variable element number on 64-bit ISA 2.07 systems or newer.
* config/rs6000/rs6000.c (rs6000_expand_vector_extract):
Likewise.
(rs6000_split_vec_extract_var): New function to split a
vec_extract built-in function with variable element number.
(rtx_is_swappable_p): Variable vec_extracts and shifts are not
swappable.
* config/rs6000/vsx.md (UNSPEC_VSX_VSLO): New unspec.
(UNSPEC_VSX_EXTRACT): Likewise.
(vsx_extract_<mode>, VSX_D iterator): Fix constraints to allow
direct move instructions to be generated on 64-bit ISA 2.07
systems and newer, and to take advantage of the ISA 3.0 MFVSRLD
instruction.
(vsx_vslo_<mode>): New insn to do VSLO on V2DFmode and V2DImode
arguments for vec_extract variable element.
(vsx_extract_<mode>_var, VSX_D iterator): New insn to support
vec_extract with variable element on V2DFmode and V2DImode
vectors.
* config/rs6000/rs6000.h (TARGET_VEXTRACTUB): Remove
-mupper-regs-df requirement, since it isn't needed.
(TARGET_DIRECT_MOVE_64BIT): New macro to say whether we can
do direct moves on 64-bit systems, which allows optimization of
vec_extract on 64-bit ISA 2.07 systems and newer.

[gcc/testsuite]
2016-07-28  Michael Meissner  <meissner@linux.vnet.ibm.com>

* gcc.target/powerpc/vec-extract-1.c: New test.

From-SVN: r238838

gcc/ChangeLog
gcc/config/rs6000/rs6000-c.c
gcc/config/rs6000/rs6000-protos.h
gcc/config/rs6000/rs6000.c
gcc/config/rs6000/rs6000.h
gcc/config/rs6000/vsx.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/powerpc/vec-extract-1.c [new file with mode: 0644]

index cad90e6c92fe7209ce02f4d068d2da175d63aabb..e51cc011e53e4d395d5ba34774f00872593b5dde 100644 (file)
@@ -1,3 +1,33 @@
+2016-07-28  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+       * config/rs6000/rs6000-protos.h (rs6000_split_vec_extract_var):
+       New declaration.
+       * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
+       Add support for vec_extract of vector double or vector long having
+       a variable element number on 64-bit ISA 2.07 systems or newer.
+       * config/rs6000/rs6000.c (rs6000_expand_vector_extract):
+       Likewise.
+       (rs6000_split_vec_extract_var): New function to split a
+       vec_extract built-in function with variable element number.
+       (rtx_is_swappable_p): Variable vec_extracts and shifts are not
+       swappable.
+       * config/rs6000/vsx.md (UNSPEC_VSX_VSLO): New unspec.
+       (UNSPEC_VSX_EXTRACT): Likewise.
+       (vsx_extract_<mode>, VSX_D iterator): Fix constraints to allow
+       direct move instructions to be generated on 64-bit ISA 2.07
+       systems and newer, and to take advantage of the ISA 3.0 MFVSRLD
+       instruction.
+       (vsx_vslo_<mode>): New insn to do VSLO on V2DFmode and V2DImode
+       arguments for vec_extract variable element.
+       (vsx_extract_<mode>_var, VSX_D iterator): New insn to support
+       vec_extract with variable element on V2DFmode and V2DImode
+       vectors.
+       * config/rs6000/rs6000.h (TARGET_VEXTRACTUB): Remove
+       -mupper-regs-df requirement, since it isn't needed.
+       (TARGET_DIRECT_MOVE_64BIT): New macro to say whether we can
+       do direct moves on 64-bit systems, which allows optimization of
+       vec_extract on 64-bit ISA 2.07 systems and newer.
+
 2016-07-28  Kristina Martsenko  <kristina.martsenko@arm.com>
 2016-07-28  Wilco Dijkstra  <wdijkstr@arm.com>
 
index b8ca0e9d3bc52dea176cab0e0e4145f94322224c..2a60262f034a24482e86f4fbaa27fa69abba937c 100644 (file)
@@ -5105,29 +5105,61 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
                                  arg2);
        }
 
-      /* If we can use the VSX xxpermdi instruction, use that for extract.  */
+      /* See if we can optimize vec_extracts with the current VSX instruction
+        set.  */
       mode = TYPE_MODE (arg1_type);
-      if ((mode == V2DFmode || mode == V2DImode) && VECTOR_MEM_VSX_P (mode)
-         && TREE_CODE (arg2) == INTEGER_CST
-         && wi::ltu_p (arg2, 2))
+      if (VECTOR_MEM_VSX_P (mode))
+
        {
          tree call = NULL_TREE;
+         int nunits = GET_MODE_NUNITS (mode);
 
-         if (mode == V2DFmode)
-           call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
-         else if (mode == V2DImode)
-           call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+         /* If the second argument is an integer constant, if the value is in
+            the expected range, generate the built-in code if we can.  We need
+            64-bit and direct move to extract the small integer vectors.  */
+         if (TREE_CODE (arg2) == INTEGER_CST && wi::ltu_p (arg2, nunits))
+           {
+             switch (mode)
+               {
+               default:
+                 break;
+
+               case V1TImode:
+                 call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V1TI];
+                 break;
+
+               case V2DFmode:
+                 call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
+                 break;
+
+               case V2DImode:
+                 call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+                 break;
+               }
+           }
+
+         /* If the second argument is variable, we can optimize it if we are
+            generating 64-bit code on a machine with direct move.  */
+         else if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT)
+           {
+             switch (mode)
+               {
+               default:
+                 break;
+
+               case V2DFmode:
+                 call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
+                 break;
+
+               case V2DImode:
+                 call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+                 break;
+               }
+           }
 
          if (call)
            return build_call_expr (call, 2, arg1, arg2);
        }
-      else if (mode == V1TImode && VECTOR_MEM_VSX_P (mode)
-              && TREE_CODE (arg2) == INTEGER_CST
-              && wi::eq_p (arg2, 0))
-       {
-         tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V1TI];
-         return build_call_expr (call, 2, arg1, arg2);
-       }
 
       /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */
       arg1_inner_type = TREE_TYPE (arg1_type);
index fefe7049804a538c17722881a8a5de65bc4a4411..fdf5c6885a7de20162adf1e69fad3199886cfee8 100644 (file)
@@ -62,6 +62,7 @@ extern void rs6000_expand_vector_init (rtx, rtx);
 extern void paired_expand_vector_init (rtx, rtx);
 extern void rs6000_expand_vector_set (rtx, rtx, int);
 extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
+extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
 extern bool altivec_expand_vec_perm_const (rtx op[4]);
 extern void altivec_expand_vec_perm_le (rtx op[4]);
 extern bool rs6000_expand_vec_perm_const (rtx op[4]);
index e5d8ad0719867d2c7254c7ac61fd4d91bcd48163..e9447f7a8b9b1831909ad10aa2eb73b262e55d80 100644 (file)
@@ -6959,8 +6959,31 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
              emit_insn (gen_vsx_extract_v4si (target, vec, elt));
              return;
            }
-         else
-           break;
+         break;
+       }
+    }
+  else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
+          && TARGET_DIRECT_MOVE_64BIT)
+    {
+      if (GET_MODE (elt) != DImode)
+       {
+         rtx tmp = gen_reg_rtx (DImode);
+         convert_move (tmp, elt, 0);
+         elt = tmp;
+       }
+
+      switch (mode)
+       {
+       case V2DFmode:
+         emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
+         return;
+
+       case V2DImode:
+         emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
+         return;
+
+       default:
+         gcc_unreachable ();
        }
     }
 
@@ -6978,6 +7001,99 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
   emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
 }
 
+/* Split a variable vec_extract operation into the component instructions.  */
+
+void
+rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
+                             rtx tmp_altivec)
+{
+  machine_mode mode = GET_MODE (src);
+  machine_mode scalar_mode = GET_MODE (dest);
+  unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
+  int byte_shift = exact_log2 (scalar_size);
+
+  gcc_assert (byte_shift >= 0);
+
+  if (REG_P (src) || SUBREG_P (src))
+    {
+      int bit_shift = byte_shift + 3;
+      rtx element2;
+
+      gcc_assert (REG_P (tmp_gpr) && REG_P (tmp_altivec));
+
+      /* For little endian, adjust element ordering.  For V2DI/V2DF, we can use
+        an XOR, otherwise we need to subtract.  The shift amount is so VSLO
+        will shift the element into the upper position (adding 3 to convert a
+        byte shift into a bit shift).  */
+      if (scalar_size == 8)
+       {
+         if (!VECTOR_ELT_ORDER_BIG)
+           {
+             emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
+             element2 = tmp_gpr;
+           }
+         else
+           element2 = element;
+
+         /* Generate RLDIC directly to shift left 6 bits and retrieve 1
+            bit.  */
+         emit_insn (gen_rtx_SET (tmp_gpr,
+                                 gen_rtx_AND (DImode,
+                                              gen_rtx_ASHIFT (DImode,
+                                                              element2,
+                                                              GEN_INT (6)),
+                                              GEN_INT (64))));
+       }
+      else
+       {
+         if (!VECTOR_ELT_ORDER_BIG)
+           {
+             rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
+
+             emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
+             emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
+             element2 = tmp_gpr;
+           }
+         else
+           element2 = element;
+
+         emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
+       }
+
+      /* Get the value into the lower byte of the Altivec register where VSLO
+        expects it.  */
+      if (TARGET_P9_VECTOR)
+       emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
+      else if (can_create_pseudo_p ())
+       emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
+      else
+       {
+         rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
+         emit_move_insn (tmp_di, tmp_gpr);
+         emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
+       }
+
+      /* Do the VSLO to get the value into the final location.  */
+      switch (mode)
+       {
+       case V2DFmode:
+         emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
+         return;
+
+       case V2DImode:
+         emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
+         return;
+
+       default:
+         gcc_unreachable ();
+       }
+
+      return;
+    }
+  else
+    gcc_unreachable ();
+ }
+
 /* Return TRUE if OP is an invalid SUBREG operation on the e500.  */
 
 bool
@@ -38640,6 +38756,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
          case UNSPEC_VSX_CVDPSPN:
          case UNSPEC_VSX_CVSPDP:
          case UNSPEC_VSX_CVSPDPN:
+         case UNSPEC_VSX_EXTRACT:
            return 0;
          case UNSPEC_VSPLT_DIRECT:
            *special = SH_SPLAT;
index a2b16d9c73b30a1531fecfb8af85f66fe03de89f..9b3e83bcd30db83c2ec7c67c1b5f7cbcf37d5932 100644 (file)
@@ -602,7 +602,6 @@ extern int rs6000_vector_align[];
 #define TARGET_DIRECT_MOVE_128 (TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \
                                 && TARGET_POWERPC64)
 #define TARGET_VEXTRACTUB      (TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \
-                                && TARGET_UPPER_REGS_DF \
                                 && TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
 
 /* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
@@ -761,6 +760,14 @@ extern int rs6000_vector_align[];
                                 && TARGET_SINGLE_FLOAT                 \
                                 && TARGET_DOUBLE_FLOAT)
 
+/* Macro to say whether we can do optimization where we need to do parts of the
+   calculation in 64-bit GPRs and then is transfered to the vector
+   registers.  */
+#define TARGET_DIRECT_MOVE_64BIT       (TARGET_DIRECT_MOVE             \
+                                        && TARGET_P8_VECTOR            \
+                                        && TARGET_POWERPC64            \
+                                        && TARGET_UPPER_REGS_DI)
+
 /* Whether the various reciprocal divide/square root estimate instructions
    exist, and whether we should automatically generate code for the instruction
    by default.  */
index 1fddebcf5c3f72f3f9b1fc4eca03b3c5c60d37b9..ca569a2cbf1f4c096a894884b0ff0156c265b139 100644 (file)
    UNSPEC_VSX_XVCVDPUXDS
    UNSPEC_VSX_SIGN_EXTEND
    UNSPEC_P9_MEMORY
+   UNSPEC_VSX_VSLO
+   UNSPEC_VSX_EXTRACT
   ])
 
 ;; VSX moves
 ;; register was picked.  Limit the scalar value to FPRs for now.
 
 (define_insn "vsx_extract_<mode>"
-  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand"
-            "=d,     wm,      wo,    d")
+  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d,    d,     wr, wr")
 
        (vec_select:<VS_scalar>
-        (match_operand:VSX_D 1 "gpc_reg_operand"
-            "<VSa>, <VSa>,  <VSa>,  <VSa>")
+        (match_operand:VSX_D 1 "gpc_reg_operand"      "<VSa>, <VSa>, wm, wo")
 
         (parallel
-         [(match_operand:QI 2 "const_0_to_1_operand"
-            "wD,    wD,     wL,     n")])))]
+         [(match_operand:QI 2 "const_0_to_1_operand"  "wD,    n,     wD, n")])))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
   int element = INTVAL (operands[2]);
   [(set_attr "type" "fpstore")
    (set_attr "length" "4")])
 
+;; Variable V2DI/V2DF extract shift
+(define_insn "vsx_vslo_<mode>"
+  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
+       (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
+                            (match_operand:V2DI 2 "gpc_reg_operand" "v")]
+                           UNSPEC_VSX_VSLO))]
+  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
+  "vslo %0,%1,%2"
+  [(set_attr "type" "vecperm")])
+
+;; Variable V2DI/V2DF extract
+(define_insn_and_split "vsx_extract_<mode>_var"
+  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
+       (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v")
+                            (match_operand:DI 2 "gpc_reg_operand" "r")]
+                           UNSPEC_VSX_EXTRACT))
+   (clobber (match_scratch:DI 3 "=r"))
+   (clobber (match_scratch:V2DI 4 "=&v"))]
+  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
+                               operands[3], operands[4]);
+  DONE;
+})
+
 ;; Extract a SF element from V4SF
 (define_insn_and_split "vsx_extract_v4sf"
   [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
index fc764c97e974ae671befbf9bd69355e50f2ef1a6..239cf67c12dc91aa3590acd00d59015d975c178b 100644 (file)
@@ -1,3 +1,7 @@
+2016-07-28  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+       * gcc.target/powerpc/vec-extract-1.c: New test.
+
 2016-07-28  Steven G. Kargl  <kargl@gcc.gnu.org>
 
        PR fortran/71799
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-extract-1.c b/gcc/testsuite/gcc.target/powerpc/vec-extract-1.c
new file mode 100644 (file)
index 0000000..ef34e2b
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O2 -mupper-regs-df -mupper-regs-di" } */
+
+#include <altivec.h>
+
+double
+add_double (vector double a, int n)
+{
+  return vec_extract (a, n) + 1.0;
+}
+
+long
+add_long (vector long a, int n)
+{
+  return vec_extract (a, n) + 1;
+}
+
+/* { dg-final { scan-assembler     "vslo"    } } */
+/* { dg-final { scan-assembler     "mtvsrd"  } } */
+/* { dg-final { scan-assembler     "mfvsrd"  } } */
+/* { dg-final { scan-assembler-not "stxvd2x" } } */
+/* { dg-final { scan-assembler-not "stxvx"   } } */
+/* { dg-final { scan-assembler-not "stxv"    } } */
+/* { dg-final { scan-assembler-not "ldx"     } } */