rs6000.c (rs6000_emit_le_vsx_move): Verify that this is never called when lxvx/stxvx...
authorBill Schmidt <wschmidt@linux.vnet.ibm.com>
Mon, 28 Dec 2015 16:37:03 +0000 (16:37 +0000)
committerWilliam Schmidt <wschmidt@gcc.gnu.org>
Mon, 28 Dec 2015 16:37:03 +0000 (16:37 +0000)
[gcc]

2015-12-28  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

* config/rs6000/rs6000.c (rs6000_emit_le_vsx_move): Verify that
this is never called when lxvx/stxvx are available.
(pass_analyze_swaps::gate): Don't perform swap optimization when
lxvx/stxvx are available.
* config/rs6000/vector.md (mov<mode>): Don't call
rs6000_emit_le_vsx_move when lxvx/stxvx are available.
* config/rs6000/vsx.md (*p9_vecload_<mode>): New define_insn.
(*p9_vecstore_<mode>): Likewise.
(*vsx_le_perm_load_<mode>:VSX_LE): Disable when lxvx/stxvx are
available.
(*vsx_le_perm_load_<mode>:VSX_W): Likewise.
(*vsx_le_perm_load_v8hi): Likewise.
(*vsx_le_perm_load_v16qi): Likewise.
(*vsx_le_perm_store_<mode>:VSX_LE): Likewise.
([related define_splits]): Likewise.
(*vsx_le_perm_store_<mode>:VSX_W): Likewise.
([related define_splits]): Likewise.
(*vsx_le_perm_store_v8hi): Likewise.
([related define_splits]): Likewise.
(*vsx_le_perm_store_v16qi): Likewise.
([related define_splits]): Likewise.
(*vsx_lxvd2x2_le_<mode>): Likewise.
(*vsx_lxvd2x4_le_<mode>): Likewise.
(*vsx_lxvd2x8_le_V8HI): Likewise.
(*vsx_lvxd2x16_le_V16QI): Likewise.
(*vsx_stxvd2x2_le_<mode>): Likewise.
(*vsx_stxvd2x4_le_<mode>): Likewise.
(*vsx_stxvd2x8_le_V8HI): Likewise.
(*vsx_stxvdx16_le_V16QI): Likewise.
([define_peepholes for vector load fusion]): Likewise.

[gcc/testsuite]

2015-12-28  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

* gcc.target/powerpc/p9-lxvx-stxvx-1.c: New.
* gcc.target/powerpc/p9-lxvx-stxvx-2.c: New.

From-SVN: r231974

gcc/ChangeLog
gcc/config/rs6000/rs6000.c
gcc/config/rs6000/vector.md
gcc/config/rs6000/vsx.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-2.c [new file with mode: 0644]

index 3a5010080d0670cd655d7ad2013df10839d026cd..5f7e0009d84c58ca8126e9d99176a107c2bcad93 100644 (file)
@@ -1,3 +1,36 @@
+2015-12-28  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+       * config/rs6000/rs6000.c (rs6000_emit_le_vsx_move): Verify that
+       this is never called when lxvx/stxvx are available.
+       (pass_analyze_swaps::gate): Don't perform swap optimization when
+       lxvx/stxvx are available.
+       * config/rs6000/vector.md (mov<mode>): Don't call
+       rs6000_emit_le_vsx_move when lxvx/stxvx are available.
+       * config/rs6000/vsx.md (*p9_vecload_<mode>): New define_insn.
+       (*p9_vecstore_<mode>): Likewise.
+       (*vsx_le_perm_load_<mode>:VSX_LE): Disable when lxvx/stxvx are
+       available.
+       (*vsx_le_perm_load_<mode>:VSX_W): Likewise.
+       (*vsx_le_perm_load_v8hi): Likewise.
+       (*vsx_le_perm_load_v16qi): Likewise.
+       (*vsx_le_perm_store_<mode>:VSX_LE): Likewise.
+       ([related define_splits]): Likewise.
+       (*vsx_le_perm_store_<mode>:VSX_W): Likewise.
+       ([related define_splits]): Likewise.
+       (*vsx_le_perm_store_v8hi): Likewise.
+       ([related define_splits]): Likewise.
+       (*vsx_le_perm_store_v16qi): Likewise.
+       ([related define_splits]): Likewise.
+       (*vsx_lxvd2x2_le_<mode>): Likewise.
+       (*vsx_lxvd2x4_le_<mode>): Likewise.
+       (*vsx_lxvd2x8_le_V8HI): Likewise.
+       (*vsx_lvxd2x16_le_V16QI): Likewise.
+       (*vsx_stxvd2x2_le_<mode>): Likewise.
+       (*vsx_stxvd2x4_le_<mode>): Likewise.
+       (*vsx_stxvd2x8_le_V8HI): Likewise.
+       (*vsx_stxvdx16_le_V16QI): Likewise.
+       ([define_peepholes for vector load fusion]): Likewise.
+
 2015-12-28  Nathan Sidwell  <nathan@acm.org>
 
        * config/nvptx/nvptx.c (nvptx_output_call_insn): Expect hard regs.
index 8ffdae2e0241ac3c85bf5f9e54f55a09a8108714..a97e47a72ba8addc4fb65bbd7b02a4cf62e7c6ae 100644 (file)
@@ -8904,6 +8904,7 @@ rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
 {
   gcc_assert (!BYTES_BIG_ENDIAN
              && VECTOR_MEM_VSX_P (mode)
+             && !TARGET_P9_VECTOR
              && !gpr_or_gpr_p (dest, source)
              && (MEM_P (source) ^ MEM_P (dest)));
 
@@ -37793,7 +37794,7 @@ public:
   virtual bool gate (function *)
     {
       return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
-             && rs6000_optimize_swaps);
+             && !TARGET_P9_VECTOR && rs6000_optimize_swaps);
     }
 
   virtual unsigned int execute (function *fun)
index d8699c8a0328baed1ee3a99d087a596c1c44fa68..1759390d25a4be2d5be9700ca5d45ab543e704fc 100644 (file)
     }
   if (!BYTES_BIG_ENDIAN
       && VECTOR_MEM_VSX_P (<MODE>mode)
+      && !TARGET_P9_VECTOR
       && !gpr_or_gpr_p (operands[0], operands[1])
       && (memory_operand (operands[0], <MODE>mode)
           ^ memory_operand (operands[1], <MODE>mode)))
index b95da6f1d0cad3e4f3647b80d89f133dbddb9580..cf5ea568ef6df030e6a2d6cc37641110d3f3c9c8 100644 (file)
    UNSPEC_VSX_XVCVDPUXDS
   ])
 
+;; VSX (P9) moves
+
+(define_insn "*p9_vecload_<mode>"
+  [(set (match_operand:VSX_M 0 "vsx_register_operand" "=<VSa>")
+        (match_operand:VSX_M 1 "memory_operand" "Z"))]
+  "TARGET_P9_VECTOR"
+  "lxvx %x0,%y1"
+  [(set_attr "type" "vecload")
+   (set_attr "length" "4")])
+
+(define_insn "*p9_vecstore_<mode>"
+  [(set (match_operand:VSX_M 0 "memory_operand" "=Z")
+        (match_operand:VSX_M 1 "vsx_register_operand" "<VSa>"))]
+  "TARGET_P9_VECTOR"
+  "stxvx %x1,%y0"
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "4")])
+
 ;; VSX moves
 
 ;; The patterns for LE permuted loads and stores come before the general
 (define_insn_and_split "*vsx_le_perm_load_<mode>"
   [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
         (match_operand:VSX_LE 1 "memory_operand" "Z"))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   [(set (match_dup 2)
         (vec_select:<MODE>
           (match_dup 1)
 (define_insn_and_split "*vsx_le_perm_load_<mode>"
   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
         (match_operand:VSX_W 1 "memory_operand" "Z"))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   [(set (match_dup 2)
         (vec_select:<MODE>
           (match_dup 1)
 (define_insn_and_split "*vsx_le_perm_load_v8hi"
   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
         (match_operand:V8HI 1 "memory_operand" "Z"))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   [(set (match_dup 2)
         (vec_select:V8HI
           (match_dup 1)
 (define_insn_and_split "*vsx_le_perm_load_v16qi"
   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
         (match_operand:V16QI 1 "memory_operand" "Z"))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   [(set (match_dup 2)
         (vec_select:V16QI
           (match_dup 1)
 (define_insn "*vsx_le_perm_store_<mode>"
   [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
         (match_operand:VSX_LE 1 "vsx_register_operand" "+<VSa>"))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
   [(set_attr "type" "vecstore")
    (set_attr "length" "12")])
 (define_split
   [(set (match_operand:VSX_LE 0 "memory_operand" "")
         (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
   [(set (match_dup 2)
         (vec_select:<MODE>
           (match_dup 1)
 (define_split
   [(set (match_operand:VSX_LE 0 "memory_operand" "")
         (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
   [(set (match_dup 1)
         (vec_select:<MODE>
           (match_dup 1)
 (define_insn "*vsx_le_perm_store_<mode>"
   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
         (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
   [(set_attr "type" "vecstore")
    (set_attr "length" "12")])
 (define_split
   [(set (match_operand:VSX_W 0 "memory_operand" "")
         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
   [(set (match_dup 2)
         (vec_select:<MODE>
           (match_dup 1)
 (define_split
   [(set (match_operand:VSX_W 0 "memory_operand" "")
         (match_operand:VSX_W 1 "vsx_register_operand" ""))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
   [(set (match_dup 1)
         (vec_select:<MODE>
           (match_dup 1)
 (define_insn "*vsx_le_perm_store_v8hi"
   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
   [(set_attr "type" "vecstore")
    (set_attr "length" "12")])
 (define_split
   [(set (match_operand:V8HI 0 "memory_operand" "")
         (match_operand:V8HI 1 "vsx_register_operand" ""))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
   [(set (match_dup 2)
         (vec_select:V8HI
           (match_dup 1)
 (define_split
   [(set (match_operand:V8HI 0 "memory_operand" "")
         (match_operand:V8HI 1 "vsx_register_operand" ""))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
   [(set (match_dup 1)
         (vec_select:V8HI
           (match_dup 1)
 (define_insn "*vsx_le_perm_store_v16qi"
   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
   [(set_attr "type" "vecstore")
    (set_attr "length" "12")])
 (define_split
   [(set (match_operand:V16QI 0 "memory_operand" "")
         (match_operand:V16QI 1 "vsx_register_operand" ""))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
   [(set (match_dup 2)
         (vec_select:V16QI
           (match_dup 1)
 (define_split
   [(set (match_operand:V16QI 0 "memory_operand" "")
         (match_operand:V16QI 1 "vsx_register_operand" ""))]
-  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
+  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
   [(set (match_dup 1)
         (vec_select:V16QI
           (match_dup 1)
         (vec_select:VSX_LE
           (match_operand:VSX_LE 1 "memory_operand" "Z")
           (parallel [(const_int 1) (const_int 0)])))]
-  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
   "lxvd2x %x0,%y1"
   [(set_attr "type" "vecload")])
 
           (match_operand:VSX_W 1 "memory_operand" "Z")
           (parallel [(const_int 2) (const_int 3)
                      (const_int 0) (const_int 1)])))]
-  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
   "lxvd2x %x0,%y1"
   [(set_attr "type" "vecload")])
 
                      (const_int 6) (const_int 7)
                      (const_int 0) (const_int 1)
                      (const_int 2) (const_int 3)])))]
-  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
   "lxvd2x %x0,%y1"
   [(set_attr "type" "vecload")])
 
                      (const_int 2) (const_int 3)
                      (const_int 4) (const_int 5)
                      (const_int 6) (const_int 7)])))]
-  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
   "lxvd2x %x0,%y1"
   [(set_attr "type" "vecload")])
 
         (vec_select:VSX_LE
           (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
           (parallel [(const_int 1) (const_int 0)])))]
-  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
   "stxvd2x %x1,%y0"
   [(set_attr "type" "vecstore")])
 
           (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
           (parallel [(const_int 2) (const_int 3)
                      (const_int 0) (const_int 1)])))]
-  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
   "stxvd2x %x1,%y0"
   [(set_attr "type" "vecstore")])
 
                      (const_int 6) (const_int 7)
                      (const_int 0) (const_int 1)
                      (const_int 2) (const_int 3)])))]
-  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
   "stxvd2x %x1,%y0"
   [(set_attr "type" "vecstore")])
 
                      (const_int 2) (const_int 3)
                      (const_int 4) (const_int 5)
                      (const_int 6) (const_int 7)])))]
-  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
+  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
   "stxvd2x %x1,%y0"
   [(set_attr "type" "vecstore")])
 
    (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
        (mem:VSX_M2 (plus:P (match_dup 0)
                            (match_operand:P 3 "int_reg_operand" ""))))]
-  "TARGET_VSX && TARGET_P8_FUSION"
+  "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
   "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"  
   [(set_attr "length" "8")
    (set_attr "type" "vecload")])
    (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
        (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
                            (match_dup 0))))]
-  "TARGET_VSX && TARGET_P8_FUSION"
+  "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
   "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"  
   [(set_attr "length" "8")
    (set_attr "type" "vecload")])
index 1164bcd3958fb5269d47cb62900996bdca8e116e..949eeea5b29a2250c8254bc0c7df76587249bcca 100644 (file)
@@ -1,3 +1,8 @@
+2015-12-28  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+       * gcc.target/powerpc/p9-lxvx-stxvx-1.c: New.
+       * gcc.target/powerpc/p9-lxvx-stxvx-2.c: New.
+
 2015-12-24  Kirill Yukhin  <kirill.yukhin@intel.com>
 
        * g++.dg/other/i386-2.C: Add -mpku.
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-1.c b/gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-1.c
new file mode 100644 (file)
index 0000000..df25d55
--- /dev/null
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O3" } */
+/* { dg-final { scan-assembler "lxvx" } } */
+/* { dg-final { scan-assembler "stxvx" } } */
+/* { dg-final { scan-assembler-not "lxvd2x" } } */
+/* { dg-final { scan-assembler-not "stxvd2x" } } */
+/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* Verify P9 vector loads and stores are used rather than the
+   load-swap/swap-store workarounds for P8.  */
+#define N 16
+
+signed char ca[N] __attribute__((aligned(16)));
+signed char cb[] __attribute__((aligned(16)))
+  = {8, 7, 6, 5, 4, 3, 2,  1,  0, -1, -2, -3, -4, -5, -6, -7};
+signed char cc[] __attribute__((aligned(16)))
+  = {1, 1, 2, 2, 3, 3, 2,  2,  1,  1,  0,  0, -1, -1, -2, -2};
+
+__attribute__((noinline)) void foo ()
+{
+  int i;
+  for (i = 0; i < N; i++) {
+    ca[i] = cb[i] - cc[i];
+  }
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-2.c b/gcc/testsuite/gcc.target/powerpc/p9-lxvx-stxvx-2.c
new file mode 100644 (file)
index 0000000..853a456
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O1" } */
+/* { dg-final { scan-assembler "lxvx" } } */
+/* { dg-final { scan-assembler "stvewx" } } */
+/* { dg-final { scan-assembler-not "lxvd2x" } } */
+
+/* Verify we don't perform P8 load-vector fusion on P9.  */
+#include <altivec.h>
+
+void f (void *p)
+{
+  vector unsigned int u32 = vec_vsx_ld (1, (const unsigned int *)p);
+  vec_ste (u32, 1, (unsigned int *)p);
+}