From 4b40986c044a5c80bfa3edb1670104f89bc8fd6b Mon Sep 17 00:00:00 2001 From: Jackson Woodruff Date: Mon, 24 Jul 2017 11:37:09 +0000 Subject: [PATCH] [AArch64, Patch] Generate MLA when multiply + add vector by scalar (On behalf of jackson.woodruff@arm.com) This merges vector multiplies and adds into a single mla instruction when the multiplication is done by a scalar. typedef int __attribute__((vector_size(16))) vec; vec mla1(vec v0, vec v1, int v2) { return v0 + v1 * c; } Now generates: mla1: fmov s2, w0 mla v0.4s, v1.4s, v2.s[0] This is also done for the identical case for a multiply followed by a subtract of vectors with an integer operand on the multiply. gcc/ 2017-07-24 Jackson Woodruff * config/aarch64/aarch64-simd.md (aarch64_mla_elt_merge): New. (aarch64_mls_elt_merge): Likewise. gcc/testsuite/ 2017-07-24 Jackson Woodruff * gcc.target/aarch64/simd/vmla_elem_1.c: New. From-SVN: r250475 --- gcc/ChangeLog | 5 ++ gcc/config/aarch64/aarch64-simd.md | 24 +++++++ gcc/testsuite/ChangeLog | 4 ++ .../gcc.target/aarch64/simd/vmla_elem_1.c | 67 +++++++++++++++++++ 4 files changed, 100 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vmla_elem_1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4740edbff24..4809a2180f2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2017-07-24 Jackson Woodruff + + * config/aarch64/aarch64-simd.md (aarch64_mla_elt_merge): New. + (aarch64_mls_elt_merge): Likewise. + 2017-07-23 Krister Walfridsson * config.gcc (*-*-netbsd*): Remove check for NetBSD versions not diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 1cb6eeb3187..011fcec0795 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1033,6 +1033,18 @@ [(set_attr "type" "neon_mla__scalar")] ) +(define_insn "*aarch64_mla_elt_merge" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (plus:VDQHS + (mult:VDQHS (vec_duplicate:VDQHS + (match_operand: 1 "register_operand" "w")) + (match_operand:VDQHS 2 "register_operand" "w")) + (match_operand:VDQHS 3 "register_operand" "0")))] + "TARGET_SIMD" + "mla\t%0., %2., %1.[0]" + [(set_attr "type" "neon_mla__scalar")] +) + (define_insn "aarch64_mls" [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0") @@ -1080,6 +1092,18 @@ [(set_attr "type" "neon_mla__scalar")] ) +(define_insn "*aarch64_mls_elt_merge" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (minus:VDQHS + (match_operand:VDQHS 1 "register_operand" "0") + (mult:VDQHS (vec_duplicate:VDQHS + (match_operand: 2 "register_operand" "w")) + (match_operand:VDQHS 3 "register_operand" "w"))))] + "TARGET_SIMD" + "mls\t%0., %3., %2.[0]" + [(set_attr "type" "neon_mla__scalar")] +) + ;; Max/Min operations. (define_insn "3" [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b1e5730db7e..951cabb0aa5 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2017-07-24 Jackson Woodruff + + * gcc.target/aarch64/simd/vmla_elem_1.c: New. + 2017-07-24 Thomas Koenig Mikael Morin diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmla_elem_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmla_elem_1.c new file mode 100644 index 00000000000..df777581ab4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmla_elem_1.c @@ -0,0 +1,67 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ + +typedef short int __attribute__ ((vector_size (16))) v8hi; + +v8hi +mla8hi (v8hi v0, v8hi v1, short int v2) +{ + /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.8h, v\[0-9\]\+\\.8h, v\[0-9\]\+\\.h\\\[0\\\]" } } */ + return v0 + v1 * v2; +} + + +v8hi +mls8hi (v8hi v0, v8hi v1, short int v2) +{ + /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.8h, v\[0-9\]\+\\.8h, v\[0-9\]\+\\.h\\\[0\\\]" } } */ + return v0 - v1 * v2; +} + +typedef short int __attribute__ ((vector_size (8))) v4hi; + +v4hi +mla4hi (v4hi v0, v4hi v1, short int v2) +{ + /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.4h, v\[0-9\]\+\\.4h, v\[0-9\]\+\\.h\\\[0\\\]" } } */ + return v0 + v1 * v2; +} + +v4hi +mls4hi (v4hi v0, v4hi v1, short int v2) +{ + /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.4h, v\[0-9\]\+\\.4h, v\[0-9\]\+\\.h\\\[0\\\]" } } */ + return v0 - v1 * v2; +} + +typedef int __attribute__ ((vector_size (16))) v4si; + +v4si +mla4si (v4si v0, v4si v1, int v2) +{ + /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.4s, v\[0-9\]\+\\.4s, v\[0-9\]\+\\.s\\\[0\\\]" } } */ + return v0 + v1 * v2; +} + +v4si +mls4si (v4si v0, v4si v1, int v2) +{ + /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.4s, v\[0-9\]\+\\.4s, v\[0-9\]\+\\.s\\\[0\\\]" } } */ + return v0 - v1 * v2; +} + +typedef int __attribute__((vector_size (8))) v2si; + +v2si +mla2si (v2si v0, v2si v1, int v2) +{ + /* { dg-final { scan-assembler "mla\\tv\[0-9\]\+\\.2s, v\[0-9\]\+\\.2s, v\[0-9\]\+\\.s\\\[0\\\]" } } */ + return v0 + v1 * v2; +} + +v2si +mls2si (v2si v0, v2si v1, int v2) +{ + /* { dg-final { scan-assembler "mls\\tv\[0-9\]\+\\.2s, v\[0-9\]\+\\.2s, v\[0-9\]\+\\.s\\\[0\\\]" } } */ + return v0 - v1 * v2; +} -- 2.30.2