PR target/83663 - Revert r255946

author Vidya Praveen <vidyapraveen@arm.com>

Mon, 8 Jan 2018 16:24:49 +0000 (16:24 +0000)

committer Vidya Praveen <vp@gcc.gnu.org>

Mon, 8 Jan 2018 16:24:49 +0000 (16:24 +0000)
author Vidya Praveen <vidyapraveen@arm.com>
Mon, 8 Jan 2018 16:24:49 +0000 (16:24 +0000)
committer Vidya Praveen <vp@gcc.gnu.org>
Mon, 8 Jan 2018 16:24:49 +0000 (16:24 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index e08bd9213fe49ad4b94c44569e95c5ac68861d19..27292bb449e8f1c606b023bfa311a01d177e7d18 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2018-01-08  Vidya Praveen  <vidyapraveen@arm.com>
+
+       PR target/83663 - Revert r255946
+       * config/aarch64/aarch64.c (aarch64_expand_vector_init): Modify code
+       generation for cases where splatting a value is not useful.
+       * simplify-rtx.c (simplify_ternary_operation): Simplify vec_merge
+       across a vec_duplicate and a paradoxical subreg forming a vector
+       mode to a vec_concat.
+
  2018-01-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
  
         * config/arm/t-aprofile (MULTILIB_MATCHES): Add mapping rules for
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index a1896058c6d305af1b7cea8b7fb5d9a815b31cb5..03a92b6d695b056fa2ccb575b5bc49b23a7b0000 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -12129,51 +12129,9 @@ aarch64_expand_vector_init (rtx target, rtx vals)
             maxv = matches[i][1];
           }
  
-      /* Create a duplicate of the most common element, unless all elements
-        are equally useless to us, in which case just immediately set the
-        vector register using the first element.  */
-
-      if (maxv == 1)
-       {
-         /* For vectors of two 64-bit elements, we can do even better.  */
-         if (n_elts == 2
-             && (inner_mode == E_DImode
-                 || inner_mode == E_DFmode))
-
-           {
-             rtx x0 = XVECEXP (vals, 0, 0);
-             rtx x1 = XVECEXP (vals, 0, 1);
-             /* Combine can pick up this case, but handling it directly
-                here leaves clearer RTL.
-
-                This is load_pair_lanes<mode>, and also gives us a clean-up
-                for store_pair_lanes<mode>.  */
-             if (memory_operand (x0, inner_mode)
-                 && memory_operand (x1, inner_mode)
-                 && !STRICT_ALIGNMENT
-                 && rtx_equal_p (XEXP (x1, 0),
-                                 plus_constant (Pmode,
-                                                XEXP (x0, 0),
-                                                GET_MODE_SIZE (inner_mode))))
-               {
-                 rtx t;
-                 if (inner_mode == DFmode)
-                   t = gen_load_pair_lanesdf (target, x0, x1);
-                 else
-                   t = gen_load_pair_lanesdi (target, x0, x1);
-                 emit_insn (t);
-                 return;
-               }
-           }
-         rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
-         aarch64_emit_move (target, lowpart_subreg (mode, x, inner_mode));
-         maxelement = 0;
-       }
-      else
-       {
-         rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
-         aarch64_emit_move (target, gen_vec_duplicate (mode, x));
-       }
+      /* Create a duplicate of the most common element.  */
+      rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
+      aarch64_emit_move (target, gen_vec_duplicate (mode, x));
  
        /* Insert the rest.  */
        for (int i = 0; i < n_elts; i++)
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c

index 6cb5a6e5d244f40013de114e0cf0ce4c37ecf12f..b052fbbe94952d59739961043c34c2fa1ee82b18 100644 (file)
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -5888,57 +5888,6 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
                 return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
             }
  
-         /* Replace:
-
-             (vec_merge:outer (vec_duplicate:outer x:inner)
-                              (subreg:outer y:inner 0)
-                              (const_int N))
-
-            with (vec_concat:outer x:inner y:inner) if N == 1,
-            or (vec_concat:outer y:inner x:inner) if N == 2.
-            We assume that degenrate cases (N == 0 or N == 3), which
-            represent taking all elements from either input, are handled
-            elsewhere.
-
-            Implicitly, this means we have a paradoxical subreg, but such
-            a check is cheap, so make it anyway.
-
-            Only applies for vectors of two elements.  */
-
-         if ((GET_CODE (op0) == VEC_DUPLICATE
-              || GET_CODE (op1) == VEC_DUPLICATE)
-             && GET_MODE (op0) == GET_MODE (op1)
-             && known_eq (GET_MODE_NUNITS (GET_MODE (op0)), 2)
-             && known_eq (GET_MODE_NUNITS (GET_MODE (op1)), 2)
-             && IN_RANGE (sel, 1, 2))
-           {
-             rtx newop0 = op0, newop1 = op1;
-
-             /* Canonicalize locally such that the VEC_DUPLICATE is always
-                the first operand.  */
-             if (GET_CODE (newop1) == VEC_DUPLICATE)
-               {
-                 std::swap (newop0, newop1);
-                 /* If we swap the operand order, we also need to swap
-                    the selector mask.  */
-                 sel = sel == 1 ? 2 : 1;
-               }
-
-             if (GET_CODE (newop1) == SUBREG
-                 && paradoxical_subreg_p (newop1)
-                 && subreg_lowpart_p (newop1)
-                 && GET_MODE (SUBREG_REG (newop1))
-                     == GET_MODE (XEXP (newop0, 0)))
-               {
-                 newop0 = XEXP (newop0, 0);
-                 newop1 = SUBREG_REG (newop1);
-                 if (sel == 2)
-                   std::swap (newop0, newop1);
-                 return simplify_gen_binary (VEC_CONCAT, mode,
-                                             newop0, newop1);
-               }
-           }
-
           /* Replace (vec_merge (vec_duplicate x) (vec_duplicate y)
                                  (const_int n))
              with (vec_concat x y) or (vec_concat y x) depending on value
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 749668f981637709d1fa1993ab3f2cdd35f19915..5c1387306932b0823ae538bc287d09d3b1c81d88 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2018-01-08  Vidya Praveen <vidyapraveen@arm.com>
+
+       PR target/83663 - Revert r255946
+       * gcc.target/aarch64/vect-slp-dup.c: New.
+
  2018-01-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
  
         * gcc.target/arm/multilib.exp: Add fp16, dotprod and armv8.3-a
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-slp-dup.c b/gcc/testsuite/gcc.target/aarch64/vect-slp-dup.c

deleted file mode 100644 (file)

index 0541e48..0000000
--- a/gcc/testsuite/gcc.target/aarch64/vect-slp-dup.c
+++ /dev/null
@@ -1,20 +0,0 @@
-/* { dg-do compile } */
-
-/* { dg-options "-O3 -ftree-vectorize -fno-vect-cost-model" } */
-
-void bar (double);
-
-void
-foo (double *restrict in, double *restrict in2,
-     double *restrict out1, double *restrict out2)
-{
-  for (int i = 0; i < 1024; i++)
-    {
-      out1[i] = in[i] + 2.0 * in[i+128];
-      out1[i+1] = in[i+1] + 2.0 * in2[i];
-      bar (in[i]);
-    }
-}
-
-/* { dg-final { scan-assembler-not "dup\tv\[0-9\]+.2d, v\[0-9\]+" } } */
-
author	Vidya Praveen <vidyapraveen@arm.com>
	Mon, 8 Jan 2018 16:24:49 +0000 (16:24 +0000)
committer	Vidya Praveen <vp@gcc.gnu.org>
	Mon, 8 Jan 2018 16:24:49 +0000 (16:24 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| history
gcc/simplify-rtx.c		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/vect-slp-dup.c	[deleted file]	patch \| blob \| history