Improve dup pattern
authorWilco Dijkstra <wdijkstr@arm.com>
Wed, 21 Jun 2017 10:46:02 +0000 (10:46 +0000)
committerWilco Dijkstra <wilco@gcc.gnu.org>
Wed, 21 Jun 2017 10:46:02 +0000 (10:46 +0000)
Improve the dup pattern to prefer vector registers.  When doing a dup
after a load, the register allocator thinks the costs are identical
and chooses an integer load.  However a dup from an integer register
includes an int->fp transfer which is not modelled.  Adding a '?' to
the integer variant means the cost is increased slightly so we prefer
using a vector register.  This improves the following example:

#include <arm_neon.h>
void f(unsigned *a, uint32x4_t *b)
{
  b[0] = vdupq_n_u32(a[1]);
  b[1] = vdupq_n_u32(a[2]);
}

to:
        ldr     s0, [x0, 4]
        dup     v0.4s, v0.s[0]
        str     q0, [x1]
        ldr     s0, [x0, 8]
        dup     v0.4s, v0.s[0]
        str     q0, [x1, 16]
        ret

    gcc/
* config/aarch64/aarch64-simd.md (aarch64_simd_dup):
Swap alternatives, make integer dup more expensive.

From-SVN: r249443

gcc/ChangeLog
gcc/config/aarch64/aarch64-simd.md

index 1aaaf61a073ce2789629d4f0c991a168c06df793..1fc622bed97add726ff7a57002ebf28a75c5c3ea 100644 (file)
@@ -1,3 +1,8 @@
+2017-06-21  Wilco Dijkstra  <wdijkstr@arm.com>
+
+       * config/aarch64/aarch64-simd.md (aarch64_simd_dup):
+       Swap alternatives, make integer dup more expensive.
+
 2017-06-21  Wilco Dijkstra  <wdijkstr@arm.com>
 
        * config/aarch64/aarch64.c (aarch64_legitimate_constant_p):
index e811f3ef01ec190284de728ff4603cfaad2b7705..c94946563e620ada88df1cfa0f7724143a147df7 100644 (file)
 (define_insn "aarch64_simd_dup<mode>"
   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
        (vec_duplicate:VDQ_I
-         (match_operand:<VEL> 1 "register_operand" "r, w")))]
+         (match_operand:<VEL> 1 "register_operand" "w,?r")))]
   "TARGET_SIMD"
   "@
-   dup\\t%0.<Vtype>, %<vw>1
-   dup\\t%0.<Vtype>, %1.<Vetype>[0]"
-  [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
+   dup\\t%0.<Vtype>, %1.<Vetype>[0]
+   dup\\t%0.<Vtype>, %<vw>1"
+  [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
 )
 
 (define_insn "aarch64_simd_dup<mode>"