From: Kyrylo Tkachov Date: Wed, 8 Nov 2017 18:32:09 +0000 (+0000) Subject: [AArch64] Add STP pattern to store a vec_concat of two 64-bit registers X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7692ce17a3383c956400a55af88d4ff500dd614a;p=gcc.git [AArch64] Add STP pattern to store a vec_concat of two 64-bit registers On top of the previous vec_merge simplifications [1] we can add this pattern to perform a store of a vec_concat of two 64-bit values in distinct registers as an STP. This avoids constructing such a vector explicitly in a register and storing it as a Q register. This way for the code in the testcase we can generate: construct_lane_1: ldp d1, d0, [x0] fmov d3, 1.0e+0 fmov d2, 2.0e+0 fadd d4, d1, d3 fadd d5, d0, d2 stp d4, d5, [x1, 32] ret construct_lane_2: ldp x2, x0, [x0] add x3, x2, 1 add x4, x0, 2 stp x3, x4, [x1, 32] ret instead of the current: construct_lane_1: ldp d0, d1, [x0] fmov d3, 1.0e+0 fmov d2, 2.0e+0 fadd d0, d0, d3 fadd d1, d1, d2 dup v0.2d, v0.d[0] ins v0.d[1], v1.d[0] str q0, [x1, 32] ret construct_lane_2: ldp x2, x3, [x0] add x0, x2, 1 add x2, x3, 2 dup v0.2d, x0 ins v0.d[1], x2 str q0, [x1, 32] ret Bootstrapped and tested on aarch64-none-linux-gnu. [1] https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00272.html https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00273.html https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00274.html * config/aarch64/aarch64-simd.md (store_pair_lanes): New pattern. * config/aarch64/constraints.md (Uml): New constraint. * config/aarch64/predicates.md (aarch64_mem_pair_lanes_operand): New predicate. * gcc.target/aarch64/store_v2vec_lanes.c: New test. From-SVN: r254551 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 37ccb2cbd0e..805625f8f3d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2017-11-08 Kyrylo Tkachov + + * config/aarch64/aarch64-simd.md (store_pair_lanes): + New pattern. + * config/aarch64/constraints.md (Uml): New constraint. + * config/aarch64/predicates.md (aarch64_mem_pair_lanes_operand): New + predicate. + 2017-11-08 Kyrylo Tkachov * simplify-rtx.c (simplify_ternary_operation): Simplify vec_merge diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 1f5c911fc4b..9a6da35f6f9 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2949,6 +2949,18 @@ [(set_attr "type" "neon_load1_1reg_q")] ) +(define_insn "store_pair_lanes" + [(set (match_operand: 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml") + (vec_concat: + (match_operand:VDC 1 "register_operand" "w, r") + (match_operand:VDC 2 "register_operand" "w, r")))] + "TARGET_SIMD" + "@ + stp\\t%d1, %d2, %0 + stp\\t%x1, %x2, %0" + [(set_attr "type" "neon_stp, store_16")] +) + ;; In this insn, operand 1 should be low, and operand 2 the high part of the ;; dest vector. diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index 4ef7a50ac1a..af4143ef756 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -171,6 +171,15 @@ (match_test "aarch64_legitimate_address_p (GET_MODE (op), XEXP (op, 0), PARALLEL, 1)"))) +;; Used for storing two 64-bit values in an AdvSIMD register using an STP +;; as a 128-bit vec_concat. +(define_memory_constraint "Uml" + "@internal + A memory address suitable for a load/store pair operation." + (and (match_code "mem") + (match_test "aarch64_legitimate_address_p (DFmode, XEXP (op, 0), + PARALLEL, 1)"))) + (define_memory_constraint "Utv" "@internal An address valid for loading/storing opaque structure diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 84d441a1cf6..2eaf0a76301 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -189,6 +189,13 @@ (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL, 0)"))) +;; Used for storing two 64-bit values in an AdvSIMD register using an STP +;; as a 128-bit vec_concat. +(define_predicate "aarch64_mem_pair_lanes_operand" + (and (match_code "mem") + (match_test "aarch64_legitimate_address_p (DFmode, XEXP (op, 0), + PARALLEL, 1)"))) + (define_predicate "aarch64_prefetch_operand" (match_test "aarch64_address_valid_for_prefetch_p (op, false)")) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3d9c337bf40..d20cadc2323 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2017-11-08 Kyrylo Tkachov + + * gcc.target/aarch64/store_v2vec_lanes.c: New test. + 2017-11-08 Kyrylo Tkachov * gcc.target/aarch64/load_v2vec_lanes_1.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c b/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c new file mode 100644 index 00000000000..6810db3c54d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/store_v2vec_lanes.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +typedef long long v2di __attribute__ ((vector_size (16))); +typedef double v2df __attribute__ ((vector_size (16))); + +void +construct_lane_1 (double *y, v2df *z) +{ + double y0 = y[0] + 1; + double y1 = y[1] + 2; + v2df x = {y0, y1}; + z[2] = x; +} + +void +construct_lane_2 (long long *y, v2di *z) +{ + long long y0 = y[0] + 1; + long long y1 = y[1] + 2; + v2di x = {y0, y1}; + z[2] = x; +} + +/* We can use the load_pair_lanes pattern to vec_concat two DI/DF + values from consecutive memory into a 2-element vector by using + a Q-reg LDR. */ + +/* { dg-final { scan-assembler-times "stp\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-not "ins\t" } } */