vec_merge + vec_duplicate + vec_concat simplification

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Wed, 8 Nov 2017 18:27:57 +0000 (18:27 +0000)

committer Kyrylo Tkachov <ktkachov@gcc.gnu.org>

Wed, 8 Nov 2017 18:27:57 +0000 (18:27 +0000)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 8 Nov 2017 18:27:57 +0000 (18:27 +0000)
committer Kyrylo Tkachov <ktkachov@gcc.gnu.org>
Wed, 8 Nov 2017 18:27:57 +0000 (18:27 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index d1225c3a94d88781f8aec592e18bbf7729fe139e..9b50bca81512f69e6823905e943c749c3d790453 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2017-11-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * simplify-rtx.c (simplify_ternary_operation, VEC_MERGE):
+       Simplify vec_merge of vec_duplicate and vec_concat.
+       * config/aarch64/constraints.md (Utq): New constraint.
+       * config/aarch64/aarch64-simd.md (load_pair_lanes<mode>): New
+       define_insn.
+
  2017-11-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
  
         * simplify-rtx.c (simplify_ternary_operation, VEC_MERGE):
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 34233f68cbae1fe75eb744ef0072aad7ca641883..1f5c911fc4b773a709571d2d2247695aefaa9c59 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2935,6 +2935,20 @@
    [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
  )
  
+(define_insn "load_pair_lanes<mode>"
+  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
+       (vec_concat:<VDBL>
+          (match_operand:VDC 1 "memory_operand" "Utq")
+          (match_operand:VDC 2 "memory_operand" "m")))]
+  "TARGET_SIMD && !STRICT_ALIGNMENT
+   && rtx_equal_p (XEXP (operands[2], 0),
+                  plus_constant (Pmode,
+                                 XEXP (operands[1], 0),
+                                 GET_MODE_SIZE (<MODE>mode)))"
+  "ldr\\t%q0, %1"
+  [(set_attr "type" "neon_load1_1reg_q")]
+)
+
  ;; In this insn, operand 1 should be low, and operand 2 the high part of the
  ;; dest vector.
  
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md

index 77ca85d929a36ca6ae407f933062e57a02b0b2f3..4ef7a50ac1ad11d902e25007b0ef8d4d683d62d4 100644 (file)
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -178,6 +178,13 @@
    (and (match_code "mem")
         (match_test "aarch64_simd_mem_operand_p (op)")))
  
+(define_memory_constraint "Utq"
+  "@internal
+   An address valid for loading or storing a 128-bit AdvSIMD register"
+  (and (match_code "mem")
+       (match_test "aarch64_legitimate_address_p (V2DImode, XEXP (op, 0),
+                                                 MEM, 1)")))
+
  (define_constraint "Ufc"
    "A floating point constant which can be used with an\
     FMOV immediate operation."
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c

index 94302f6d00962251eab7cdeb7018bfe5f139a876..92c783af5a6a32bcfad5daf5ccfcce7e33661007 100644 (file)
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -5765,6 +5765,25 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
                 std::swap (newop0, newop1);
               return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
             }
+         /* Replace (vec_merge (vec_duplicate x) (vec_concat (y) (z)) (const_int N))
+            with (vec_concat x z) if N == 1, or (vec_concat y x) if N == 2.
+            Only applies for vectors of two elements.  */
+         if (GET_CODE (op0) == VEC_DUPLICATE
+             && GET_CODE (op1) == VEC_CONCAT
+             && GET_MODE_NUNITS (GET_MODE (op0)) == 2
+             && GET_MODE_NUNITS (GET_MODE (op1)) == 2
+             && IN_RANGE (sel, 1, 2))
+           {
+             rtx newop0 = XEXP (op0, 0);
+             rtx newop1 = XEXP (op1, 2 - sel);
+             rtx otherop = XEXP (op1, sel - 1);
+             if (sel == 2)
+               std::swap (newop0, newop1);
+             /* Don't want to throw away the other part of the vec_concat if
+                it has side-effects.  */
+             if (!side_effects_p (otherop))
+               return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
+           }
         }
  
        if (rtx_equal_p (op0, op1)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 28894ee07a1603b2a91f2be8155a8b7f0119cd0d..3d9c337bf404fbac514d363696d8395408c3e1a3 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2017-11-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * gcc.target/aarch64/load_v2vec_lanes_1.c: New test.
+
  2017-11-08  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
  
         * gcc.target/aarch64/construct_lane_zero_1.c: New test.
diff --git a/gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c b/gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c

new file mode 100644 (file)

index 0000000..3c31b34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef long long v2di __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+
+v2di
+construct_lanedi (long long *y)
+{
+  v2di x = { y[0], y[1] };
+  return x;
+}
+
+v2df
+construct_lanedf (double *y)
+{
+  v2df x = { y[0], y[1] };
+  return x;
+}
+
+/* We can use the load_pair_lanes<mode> pattern to vec_concat two DI/DF
+   values from consecutive memory into a 2-element vector by using
+   a Q-reg LDR.  */
+
+/* { dg-final { scan-assembler-times "ldr\tq\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-not "ins\t" } } */
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Wed, 8 Nov 2017 18:27:57 +0000 (18:27 +0000)
committer	Kyrylo Tkachov <ktkachov@gcc.gnu.org>
	Wed, 8 Nov 2017 18:27:57 +0000 (18:27 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| history
gcc/config/aarch64/constraints.md		patch \| blob \| history
gcc/simplify-rtx.c		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/load_v2vec_lanes_1.c	[new file with mode: 0644]	patch \| blob