From d83950138bd18b0c79f336513005cacfc0dcfdc7 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Mon, 5 Jun 2017 08:52:02 +0000 Subject: [PATCH] [AArch64] Add combine pattern for storing lane zero of a vector * config/aarch64/aarch64-simd.md (aarch64_store_lane0): New pattern. * gcc.target/aarch64/store_lane0_str_1.c: New test. From-SVN: r248871 --- gcc/ChangeLog | 5 ++ gcc/config/aarch64/aarch64-simd.md | 13 +++++ gcc/testsuite/ChangeLog | 4 ++ .../gcc.target/aarch64/store_lane0_str_1.c | 54 +++++++++++++++++++ 4 files changed, 76 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/store_lane0_str_1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1921a6fd522..06bfd2904e6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2017-06-05 Kyrylo Tkachov + + * config/aarch64/aarch64-simd.md (aarch64_store_lane0): + New pattern. + 2017-06-05 Kyrylo Tkachov * config/aarch64/aarch64.md (sub3_compare1_imm): New define_insn. diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 693b476788e..c5a86ff6f71 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -153,6 +153,19 @@ (set_attr "length" "4,4,4,8,8,8,4")] ) +;; When storing lane zero we can use the normal STR and its more permissive +;; addressing modes. + +(define_insn "aarch64_store_lane0" + [(set (match_operand: 0 "memory_operand" "=m") + (vec_select: (match_operand:VALL_F16 1 "register_operand" "w") + (parallel [(match_operand 2 "const_int_operand" "n")])))] + "TARGET_SIMD + && ENDIAN_LANE_N (mode, INTVAL (operands[2])) == 0" + "str\\t%1, %0" + [(set_attr "type" "neon_store1_1reg")] +) + (define_insn "load_pair" [(set (match_operand:VD 0 "register_operand" "=w") (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump")) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5fa7850015f..85de68e52d6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2017-06-05 Kyrylo Tkachov + + * gcc.target/aarch64/store_lane0_str_1.c: New test. + 2017-06-05 Kyrylo Tkachov * gcc.target/aarch64/subs_compare_2.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/store_lane0_str_1.c b/gcc/testsuite/gcc.target/aarch64/store_lane0_str_1.c new file mode 100644 index 00000000000..4464fec2c1f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/store_lane0_str_1.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +typedef int v2si __attribute__ ((vector_size (8))); +typedef float v2sf __attribute__ ((vector_size (8))); +typedef short v4hi __attribute__ ((vector_size (8))); +typedef __fp16 v4hf __attribute__ ((vector_size (8))); +typedef char v8qi __attribute__ ((vector_size (8))); + +typedef int v4si __attribute__ ((vector_size (16))); +typedef float v4sf __attribute__ ((vector_size (16))); +typedef short v8hi __attribute__ ((vector_size (16))); +typedef __fp16 v8hf __attribute__ ((vector_size (16))); +typedef char v16qi __attribute__ ((vector_size (16))); +typedef long long v2di __attribute__ ((vector_size (16))); +typedef double v2df __attribute__ ((vector_size (16))); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define LANE(N) (N - 1) +#else +#define LANE(N) 0 +#endif + +#define FUNC(T, E, N) \ +void \ +store_lane_##T (T x, E *y) \ +{ \ + y[0] = x[N - 1 - LANE (N)]; \ + y[3] = x[LANE (N)]; \ +} + +FUNC (v2si, int, 2) +FUNC (v2sf, float, 2) +FUNC (v4hi, short, 4) +FUNC (v4hf, __fp16, 4) +FUNC (v8qi, char, 8) + +FUNC (v4si, int, 4) +FUNC (v4sf, float, 4) +FUNC (v8hi, short, 8) +FUNC (v8hf, __fp16, 8) +FUNC (v16qi, char, 16) +FUNC (v2di, long long, 2) +FUNC (v2df, double, 2) + +/* When storing lane zero of a vector we can use the scalar STR instruction + that supports more addressing modes. */ + +/* { dg-final { scan-assembler-times "str\ts\[0-9\]+" 4 } } */ +/* { dg-final { scan-assembler-times "str\tb\[0-9\]+" 2 } } */ +/* { dg-final { scan-assembler-times "str\th\[0-9\]+" 4 } } */ +/* { dg-final { scan-assembler-times "str\td\[0-9\]+" 2 } } */ +/* { dg-final { scan-assembler-not "umov" } } */ +/* { dg-final { scan-assembler-not "dup" } } */ -- 2.30.2