From 0a3eccb6ef9351cf8668eede8060dd7481794cd2 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Mon, 1 Feb 2021 13:50:43 +0000 Subject: [PATCH] AArch64: Change canonization of smlal and smlsl in order to be able to optimize the vec_dup g:87301e3956d44ad45e384a8eb16c79029d20213a and g:ee4c4fe289e768d3c6b6651c8bfa3fdf458934f4 changed the intrinsics to be proper RTL but accidentally ended up creating a regression because of the ordering in the RTL pattern. The existing RTL that combine should try to match to remove the vec_dup is aarch64_vec_mlal_lane and aarch64_vec_mult_lane which expects the select register to be the second operand of mult. The pattern introduced has it as the first operand so combine was unable to remove the vec_dup. This flips the order such that the patterns optimize correctly. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_mlal_n, aarch64_mlsl, aarch64_mlsl_n): Flip mult operands. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/smlal-smlsl-mull-optimized.c: New test. --- gcc/config/aarch64/aarch64-simd.md | 18 ++++---- .../smlal-smlsl-mull-optimized.c | 45 +++++++++++++++++++ 2 files changed, 54 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/smlal-smlsl-mull-optimized.c diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index bca2d8a3437..d1858663a4e 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1950,10 +1950,10 @@ (plus: (mult: (ANY_EXTEND: - (vec_duplicate:VD_HSI - (match_operand: 3 "register_operand" ""))) + (match_operand:VD_HSI 2 "register_operand" "w")) (ANY_EXTEND: - (match_operand:VD_HSI 2 "register_operand" "w"))) + (vec_duplicate:VD_HSI + (match_operand: 3 "register_operand" "")))) (match_operand: 1 "register_operand" "0")))] "TARGET_SIMD" "mlal\t%0., %2., %3.[0]" @@ -1980,10 +1980,10 @@ (match_operand: 1 "register_operand" "0") (mult: (ANY_EXTEND: - (vec_duplicate:VD_HSI - (match_operand: 3 "register_operand" ""))) + (match_operand:VD_HSI 2 "register_operand" "w")) (ANY_EXTEND: - (match_operand:VD_HSI 2 "register_operand" "w")))))] + (vec_duplicate:VD_HSI + (match_operand: 3 "register_operand" ""))))))] "TARGET_SIMD" "mlsl\t%0., %2., %3.[0]" [(set_attr "type" "neon_mla__long")] @@ -2078,10 +2078,10 @@ [(set (match_operand: 0 "register_operand" "=w") (mult: (ANY_EXTEND: - (vec_duplicate: - (match_operand: 2 "register_operand" ""))) + (match_operand:VD_HSI 1 "register_operand" "w")) (ANY_EXTEND: - (match_operand:VD_HSI 1 "register_operand" "w"))))] + (vec_duplicate: + (match_operand: 2 "register_operand" "")))))] "TARGET_SIMD" "mull\t%0., %1., %2.[0]" [(set_attr "type" "neon_mul__scalar_long")] diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/smlal-smlsl-mull-optimized.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/smlal-smlsl-mull-optimized.c new file mode 100644 index 00000000000..1e963e5002e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/smlal-smlsl-mull-optimized.c @@ -0,0 +1,45 @@ +/* { dg-do compile { target aarch64-*-* } } */ + +#include + +/* +**add: +** smlal v0.4s, v1.4h, v2.h[3] +** ret +*/ + +int32x4_t add(int32x4_t acc, int16x4_t b, int16x4_t c) { + return vmlal_n_s16(acc, b, c[3]); +} + +/* +**sub: +** smlsl v0.4s, v1.4h, v2.h[3] +** ret +*/ + +int32x4_t sub(int32x4_t acc, int16x4_t b, int16x4_t c) { + return vmlsl_n_s16(acc, b, c[3]); +} + +/* +**smull: +** smull v0.4s, v1.4h, v2.h[3] +** ret +*/ + +int32x4_t smull(int16x4_t b, int16x4_t c) { + return vmull_n_s16(b, c[3]); +} + +/* +**umull: +** umull v0.4s, v1.4h, v2.h[3] +** ret +*/ + +uint32x4_t umull(uint16x4_t b, uint16x4_t c) { + return vmull_n_u16(b, c[3]); +} + +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" {-O[^0]} } } */ -- 2.30.2