From dca19fe10daf00bfef714e3f9f98c63ff70c7a64 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Wed, 21 Jun 2017 10:46:02 +0000 Subject: [PATCH] Improve dup pattern Improve the dup pattern to prefer vector registers. When doing a dup after a load, the register allocator thinks the costs are identical and chooses an integer load. However a dup from an integer register includes an int->fp transfer which is not modelled. Adding a '?' to the integer variant means the cost is increased slightly so we prefer using a vector register. This improves the following example: #include void f(unsigned *a, uint32x4_t *b) { b[0] = vdupq_n_u32(a[1]); b[1] = vdupq_n_u32(a[2]); } to: ldr s0, [x0, 4] dup v0.4s, v0.s[0] str q0, [x1] ldr s0, [x0, 8] dup v0.4s, v0.s[0] str q0, [x1, 16] ret gcc/ * config/aarch64/aarch64-simd.md (aarch64_simd_dup): Swap alternatives, make integer dup more expensive. From-SVN: r249443 --- gcc/ChangeLog | 5 +++++ gcc/config/aarch64/aarch64-simd.md | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1aaaf61a073..1fc622bed97 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2017-06-21 Wilco Dijkstra + + * config/aarch64/aarch64-simd.md (aarch64_simd_dup): + Swap alternatives, make integer dup more expensive. + 2017-06-21 Wilco Dijkstra * config/aarch64/aarch64.c (aarch64_legitimate_constant_p): diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e811f3ef01e..c94946563e6 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -44,12 +44,12 @@ (define_insn "aarch64_simd_dup" [(set (match_operand:VDQ_I 0 "register_operand" "=w, w") (vec_duplicate:VDQ_I - (match_operand: 1 "register_operand" "r, w")))] + (match_operand: 1 "register_operand" "w,?r")))] "TARGET_SIMD" "@ - dup\\t%0., %1 - dup\\t%0., %1.[0]" - [(set_attr "type" "neon_from_gp, neon_dup")] + dup\\t%0., %1.[0] + dup\\t%0., %1" + [(set_attr "type" "neon_dup, neon_from_gp")] ) (define_insn "aarch64_simd_dup" -- 2.30.2