From: Michael Meissner <meissner@linux.vnet.ibm.com>
Date: Thu, 16 Mar 2017 20:09:21 +0000 (+0000)
Subject: re PR target/71294 (ICE in gen_add2_insn, at optabs.c:4442 on powerpc64le-linux)
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=207f34233c1d3fff9e07ecf2bd7dc30d86822e66;p=gcc.git

re PR target/71294 (ICE in gen_add2_insn, at optabs.c:4442 on powerpc64le-linux)

[gcc]
2017-03-16  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/71294
	* config/rs6000/vsx.md (vsx_splat_<mode>, VSX_D iterator): Allow a
	SPLAT operation on ISA 2.07 64-bit systems that have direct move,
	but no MTVSRDD support, by doing MTVSRD and XXPERMDI.

[gcc/testsuite]
2017-03-16  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/71294
	* g++.dg/pr71294.C: New test.

From-SVN: r246209
---

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 96e1592daae..2fd226d056f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2017-03-16  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/71294
+	* config/rs6000/vsx.md (vsx_splat_<mode>, VSX_D iterator): Allow a
+	SPLAT operation on ISA 2.07 64-bit systems that have direct move,
+	but no MTVSRDD support, by doing MTVSRD and XXPERMDI.
+
 2017-03-16  Jeff Law  <law@redhat.com>
 
 	PR tree-optimization/71437
@@ -23,14 +30,14 @@
 	(vrp_finalize): Do not call identify_jump_threads here.
 	(execute_vrp): Do it here instead and call thread_through_all_blocks
 	here too.
-	
+
 	PR tree-optimization/71437
 	* tree-ssa-dom.c (pfn_simplify): Add basic_block argument.  All
 	callers changed.
 	(simplify_stmt_for_jump_threading): Add basic_block argument.  All
 	callers changed.
 	(lhs_of_dominating_assert): Moved from here into tree-vrp.c.
-	(dom_opt_dom_walker::thread_across_edge): Remove 
+	(dom_opt_dom_walker::thread_across_edge): Remove
 	handle_dominating_asserts argument.  All callers changed.
 	(record_temporary_equivalences_from_stmts_at_dest): Corresponding
 	changes.  Remove calls to lhs_of_dominating_assert.  Other
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 01b55e616b7..f4f1663b36d 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3067,16 +3067,29 @@
 })
 
 ;; V2DF/V2DI splat
-(define_insn "vsx_splat_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>,<VSa>,we")
+(define_insn_and_split "vsx_splat_<mode>"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand"
+					"=<VSa>,    <VSa>,we,<VS_64dm>")
 	(vec_duplicate:VSX_D
-	 (match_operand:<VS_scalar> 1 "splat_input_operand" "<VS_64reg>,Z,b")))]
+	 (match_operand:<VS_scalar> 1 "splat_input_operand"
+					"<VS_64reg>,Z,    b, wr")))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
   "@
    xxpermdi %x0,%x1,%x1,0
    lxvdsx %x0,%y1
-   mtvsrdd %x0,%1,%1"
-  [(set_attr "type" "vecperm,vecload,vecperm")])
+   mtvsrdd %x0,%1,%1
+   #"
+  "&& reload_completed && TARGET_POWERPC64 && !TARGET_P9_VECTOR
+   && int_reg_operand (operands[1], <VS_scalar>mode)"
+  [(set (match_dup 2)
+	(match_dup 1))
+   (set (match_dup 0)
+	(vec_duplicate:VSX_D (match_dup 2)))]
+{
+  operands[2] = gen_rtx_REG (<VS_scalar>mode, reg_or_subregno (operands[0]));
+}
+  [(set_attr "type" "vecperm,vecload,vecperm,vecperm")
+   (set_attr "length" "4,4,4,8")])
 
 ;; V4SI splat support
 (define_insn "vsx_splat_v4si"
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 1a6d154083b..72c74df18d2 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2017-03-16  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/71294
+	* g++.dg/pr71294.C: New test.
+
 2017-03-16  Segher Boessenkool  <segher@kernel.crashing.org>
 
 	* gcc.dg/tree-prof/pr66295.c: Skip unless on an x86 target.
diff --git a/gcc/testsuite/g++.dg/pr71294.C b/gcc/testsuite/g++.dg/pr71294.C
new file mode 100644
index 00000000000..55dd01e5ccf
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr71294.C
@@ -0,0 +1,60 @@
+// { dg-do compile { target { powerpc64*-*-* && lp64 } } }
+// { dg-require-effective-target powerpc_p8vector_ok } */
+// { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } }
+// { dg-options "-mcpu=power8 -O3 -fstack-protector -mno-lra" }
+
+// PAR target/71294 failed because RELOAD could not figure how create a V2DI
+// vector that auto vectorization created with each element being the same
+// stack address, with stack-protector turned on.
+
+class A;
+template <typename _Tp, int m, int n> class B {
+public:
+  _Tp val[m * n];
+};
+class C {
+public:
+  C(A);
+};
+struct D {
+  D();
+  unsigned long &operator[](int);
+  unsigned long *p;
+};
+class A {
+public:
+  template <typename _Tp, int m, int n> A(const B<_Tp, m, n> &, bool);
+  int rows, cols;
+  unsigned char *data;
+  unsigned char *datastart;
+  unsigned char *dataend;
+  unsigned char *datalimit;
+  D step;
+};
+template <typename _Tp, int m, int n>
+A::A(const B<_Tp, m, n> &p1, bool)
+    : rows(m), cols(n) {
+  step[0] = cols * sizeof(_Tp);
+  datastart = data = (unsigned char *)p1.val;
+  datalimit = dataend = datastart + rows * step[0];
+}
+class F {
+public:
+  static void compute(C);
+  template <typename _Tp, int m, int n, int nm>
+  static void compute(const B<_Tp, m, n> &, B<_Tp, nm, 1> &, B<_Tp, m, nm> &,
+                      B<_Tp, n, nm> &);
+};
+D::D() {}
+unsigned long &D::operator[](int p1) { return p[p1]; }
+template <typename _Tp, int m, int n, int nm>
+void F::compute(const B<_Tp, m, n> &, B<_Tp, nm, 1> &, B<_Tp, m, nm> &,
+                B<_Tp, n, nm> &p4) {
+  A a(p4, false);
+  compute(a);
+}
+void fn1() {
+  B<double, 4, 4> b, c, e;
+  B<double, 4, 1> d;
+  F::compute(b, d, c, e);
+}