rs6000.c (special_handling_values): Add SH_SPLAT.

author Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Mon, 8 Sep 2014 18:40:56 +0000 (18:40 +0000)

committer William Schmidt <wschmidt@gcc.gnu.org>

Mon, 8 Sep 2014 18:40:56 +0000 (18:40 +0000)
author Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Mon, 8 Sep 2014 18:40:56 +0000 (18:40 +0000)
committer William Schmidt <wschmidt@gcc.gnu.org>
Mon, 8 Sep 2014 18:40:56 +0000 (18:40 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 6137b1a4b6792755801809da9780ecb41d1aac42..668d742e1ba7374fbaf9e4c58883b67224267ac4 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2014-09-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+       * config/rs6000/rs6000.c (special_handling_values):  Add SH_SPLAT.
+       (rtx_is_swappable_p): Convert UNSPEC cascading ||s to a switch
+       statement; allow optimization of UNSPEC_VSPLT_DIRECT with special
+       handling SH_SPLAT.
+       (adjust_extract): Fix test for VEC_DUPLICATE case; fix adjustment
+       of extracted lane.
+       (adjust_splat): New function.
+       (handle_special_swappables): Call adjust_splat for SH_SPLAT.
+       (dump_swap_insn_table): Add case for SH_SPLAT.
+
  2014-09-08  Richard Biener  <rguenther@suse.de>
  
         PR ipa/63196
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c

index 6370304b28731f439b2b8b7710d079535ddaf404..fd9198664402bf45492309189391333c4d20ca02 100644 (file)
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -33524,7 +33524,8 @@ enum special_handling_values {
    SH_SUBREG,
    SH_NOSWAP_LD,
    SH_NOSWAP_ST,
-  SH_EXTRACT
+  SH_EXTRACT,
+  SH_SPLAT
  };
  
  /* Union INSN with all insns containing definitions that reach USE.
@@ -33735,43 +33736,50 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
            vector splat are element-order sensitive.  A few of these
            cases might be workable with special handling if required.  */
         int val = XINT (op, 1);
-       if (val == UNSPEC_VMRGH_DIRECT
-           || val == UNSPEC_VMRGL_DIRECT
-           || val == UNSPEC_VPACK_SIGN_SIGN_SAT
-           || val == UNSPEC_VPACK_SIGN_UNS_SAT
-           || val == UNSPEC_VPACK_UNS_UNS_MOD
-           || val == UNSPEC_VPACK_UNS_UNS_MOD_DIRECT
-           || val == UNSPEC_VPACK_UNS_UNS_SAT
-           || val == UNSPEC_VPERM
-           || val == UNSPEC_VPERM_UNS
-           || val == UNSPEC_VPERMHI
-           || val == UNSPEC_VPERMSI
-           || val == UNSPEC_VPKPX
-           || val == UNSPEC_VSLDOI
-           || val == UNSPEC_VSLO
-           || val == UNSPEC_VSPLT_DIRECT
-           || val == UNSPEC_VSRO
-           || val == UNSPEC_VSUM2SWS
-           || val == UNSPEC_VSUM4S
-           || val == UNSPEC_VSUM4UBS
-           || val == UNSPEC_VSUMSWS
-           || val == UNSPEC_VSUMSWS_DIRECT
-           || val == UNSPEC_VSX_CONCAT
-           || val == UNSPEC_VSX_CVSPDP
-           || val == UNSPEC_VSX_CVSPDPN
-           || val == UNSPEC_VSX_SET
-           || val == UNSPEC_VSX_SLDWI
-           || val == UNSPEC_VUNPACK_HI_SIGN
-           || val == UNSPEC_VUNPACK_HI_SIGN_DIRECT
-           || val == UNSPEC_VUNPACK_LO_SIGN
-           || val == UNSPEC_VUNPACK_LO_SIGN_DIRECT
-           || val == UNSPEC_VUPKHPX
-           || val == UNSPEC_VUPKHS_V4SF
-           || val == UNSPEC_VUPKHU_V4SF
-           || val == UNSPEC_VUPKLPX
-           || val == UNSPEC_VUPKLS_V4SF
-           || val == UNSPEC_VUPKHU_V4SF)
-         return 0;
+       switch (val)
+         {
+         default:
+           break;
+         case UNSPEC_VMRGH_DIRECT:
+         case UNSPEC_VMRGL_DIRECT:
+         case UNSPEC_VPACK_SIGN_SIGN_SAT:
+         case UNSPEC_VPACK_SIGN_UNS_SAT:
+         case UNSPEC_VPACK_UNS_UNS_MOD:
+         case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
+         case UNSPEC_VPACK_UNS_UNS_SAT:
+         case UNSPEC_VPERM:
+         case UNSPEC_VPERM_UNS:
+         case UNSPEC_VPERMHI:
+         case UNSPEC_VPERMSI:
+         case UNSPEC_VPKPX:
+         case UNSPEC_VSLDOI:
+         case UNSPEC_VSLO:
+         case UNSPEC_VSRO:
+         case UNSPEC_VSUM2SWS:
+         case UNSPEC_VSUM4S:
+         case UNSPEC_VSUM4UBS:
+         case UNSPEC_VSUMSWS:
+         case UNSPEC_VSUMSWS_DIRECT:
+         case UNSPEC_VSX_CONCAT:
+         case UNSPEC_VSX_CVSPDP:
+         case UNSPEC_VSX_CVSPDPN:
+         case UNSPEC_VSX_SET:
+         case UNSPEC_VSX_SLDWI:
+         case UNSPEC_VUNPACK_HI_SIGN:
+         case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
+         case UNSPEC_VUNPACK_LO_SIGN:
+         case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
+         case UNSPEC_VUPKHPX:
+         case UNSPEC_VUPKHS_V4SF:
+         case UNSPEC_VUPKHU_V4SF:
+         case UNSPEC_VUPKLPX:
+         case UNSPEC_VUPKLS_V4SF:
+         case UNSPEC_VUPKLU_V4SF:
+           return 0;
+         case UNSPEC_VSPLT_DIRECT:
+           *special = SH_SPLAT;
+           return 1;
+         }
        }
  
      default:
@@ -34098,20 +34106,20 @@ permute_store (rtx_insn *insn)
              INSN_UID (insn));
  }
  
-/* Given OP that contains a vector extract operation, change the index
-   of the extracted lane to count from the other side of the vector.  */
+/* Given OP that contains a vector extract operation, adjust the index
+   of the extracted lane to account for the doubleword swap.  */
  static void
  adjust_extract (rtx_insn *insn)
  {
-  rtx body = PATTERN (insn);
+  rtx src = SET_SRC (PATTERN (insn));
    /* The vec_select may be wrapped in a vec_duplicate for a splat, so
       account for that.  */
-  rtx sel = (GET_CODE (body) == VEC_DUPLICATE
-            ? XEXP (XEXP (body, 0), 1)
-            : XEXP (body, 1));
+  rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
    rtx par = XEXP (sel, 1);
-  int nunits = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0)));
-  XVECEXP (par, 0, 0) = GEN_INT (nunits - 1 - INTVAL (XVECEXP (par, 0, 0)));
+  int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
+  int lane = INTVAL (XVECEXP (par, 0, 0));
+  lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
+  XVECEXP (par, 0, 0) = GEN_INT (lane);
    INSN_CODE (insn) = -1; /* Force re-recognition.  */
    df_insn_rescan (insn);
  
@@ -34119,6 +34127,24 @@ adjust_extract (rtx_insn *insn)
      fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
  }
  
+/* Given OP that contains a vector direct-splat operation, adjust the index
+   of the source lane to account for the doubleword swap.  */
+static void
+adjust_splat (rtx_insn *insn)
+{
+  rtx body = PATTERN (insn);
+  rtx unspec = XEXP (body, 1);
+  int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
+  int lane = INTVAL (XVECEXP (unspec, 0, 1));
+  lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
+  XVECEXP (unspec, 0, 1) = GEN_INT (lane);
+  INSN_CODE (insn) = -1; /* Force re-recognition.  */
+  df_insn_rescan (insn);
+
+  if (dump_file)
+    fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
+}
+
  /* The insn described by INSN_ENTRY[I] can be swapped, but only
     with special handling.  Take care of that here.  */
  static void
@@ -34160,6 +34186,11 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
      case SH_EXTRACT:
        /* Change the lane on an extract operation.  */
        adjust_extract (insn);
+      break;
+    case SH_SPLAT:
+      /* Change the lane on a direct-splat operation.  */
+      adjust_splat (insn);
+      break;
      }
  }
  
@@ -34230,6 +34261,8 @@ dump_swap_insn_table (swap_web_entry *insn_entry)
               fputs ("special:store ", dump_file);
             else if (insn_entry[i].special_handling == SH_EXTRACT)
               fputs ("special:extract ", dump_file);
+           else if (insn_entry[i].special_handling == SH_SPLAT)
+             fputs ("special:splat ", dump_file);
           }
         if (insn_entry[i].web_not_optimizable)
           fputs ("unoptimizable ", dump_file);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 084c0d104556d23fbcfed9d6e80b52df7e327342..cd4650c610acd458ad8965d9e0dd41b5460009c2 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2014-09-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+       * gcc.target/powerpc/swaps-p8-16.c: New test.
+
  2014-09-07  Richard Sandiford  <rdsandiford@googlemail.com>
  
         * gcc.target/i386/pr62208.c: New test.
diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-16.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-16.c

new file mode 100644 (file)

index 0000000..b57b1ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-16.c
@@ -0,0 +1,56 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-options "-mcpu=power8 -O3" } */
+/* { dg-final { scan-assembler "lxvd2x" } } */
+/* { dg-final { scan-assembler "stxvd2x" } } */
+/* { dg-final { scan-assembler "vspltw" } } */
+/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+#include <altivec.h>
+void abort();
+
+typedef struct xx {vector double l; vector double h;} xx;
+
+#define N 4096
+#define M 10000000
+vector float ca[N][4] = {0};
+vector float cb[N][4] = {0};
+vector float cc[N][4] = {0};
+
+__attribute__((noinline)) void foo ()
+{
+  int i;
+  vector float brow;
+
+  for (i = 0; i < N; i++) {
+
+    brow = cb[i][0];
+    cc[i][0] = vec_mul(vec_splats(brow[0]), ca[i][0]);
+    cc[i][0] = vec_madd(cc[i][0],vec_splats(brow[1]), ca[i][1]);
+    cc[i][0] = vec_madd(cc[i][0],vec_splats(brow[2]), ca[i][2]);
+    cc[i][0] = vec_madd(cc[i][0],vec_splats(brow[3]), ca[i][3]);
+
+    brow = cb[i][1];
+    cc[i][1] = vec_mul(vec_splats(brow[0]), ca[i][0]);
+    cc[i][1] = vec_madd(cc[i][0],vec_splats(brow[1]), ca[i][1]);
+    cc[i][1] = vec_madd(cc[i][0],vec_splats(brow[2]), ca[i][2]);
+    cc[i][1] = vec_madd(cc[i][0],vec_splats(brow[3]), ca[i][3]);
+    
+    brow = cb[i][2];
+    cc[i][2] = vec_mul(vec_splats(brow[0]), ca[i][0]);
+    cc[i][2] = vec_madd(cc[i][0],vec_splats(brow[1]), ca[i][1]);
+    cc[i][2] = vec_madd(cc[i][0],vec_splats(brow[2]), ca[i][2]);
+    cc[i][2] = vec_madd(cc[i][0],vec_splats(brow[3]), ca[i][3]);
+    
+    brow = cb[i][3];
+    cc[i][3] = vec_mul(vec_splats(brow[0]), ca[i][0]);
+    cc[i][3] = vec_madd(cc[i][0],vec_splats(brow[1]), ca[i][1]);
+    cc[i][3] = vec_madd(cc[i][0],vec_splats(brow[2]), ca[i][2]);
+    cc[i][3] = vec_madd(cc[i][0],vec_splats(brow[3]), ca[i][3]);
+  }
+}
+
+int main ()
+{
+  foo ();
+  return 0;
+}
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Mon, 8 Sep 2014 18:40:56 +0000 (18:40 +0000)
committer	William Schmidt <wschmidt@gcc.gnu.org>
	Mon, 8 Sep 2014 18:40:56 +0000 (18:40 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/rs6000/rs6000.c		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/powerpc/swaps-p8-16.c	[new file with mode: 0644]	patch \| blob