From: Kyrylo Tkachov Date: Thu, 7 Jan 2021 14:02:02 +0000 (+0000) Subject: aarch64: Fix RTL patterns for UABA/SABA X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c9d25aa7489fd478098e0ef098438e797d597d3b;p=gcc.git aarch64: Fix RTL patterns for UABA/SABA Sometime ago we changed the RTL representation of the (SU)ABD instructions in RTL to a (MINUS (MAX) (MIN)) rather than a (MINUS (ABS) (ABS)) as it is more correctly models the semantics. We should do the same for the accumulation forms of these instructions: UABA/SABA. This patch does that and allows the new pattern to generate the unsigned UABA form as well. The new form also allows it to more easily be re-used to implement the relevant arm_neon.h intrinsics in the future. The testcase takes an -fno-tree-reassoc to work around a side-effect of PR98581. gcc/ * config/aarch64/aarch64-simd.md (aba_3): Rename to... (aarch64_aba): ... This. Handle uaba as well. Change RTL pattern to match. gcc/testsuite/ * gcc.target/aarch64/usaba_1.c: New test. --- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 85770c84f0a..d23398eac34 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -859,14 +859,18 @@ } ) -(define_insn "aba_3" +(define_insn "aarch64_aba" [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") - (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI - (match_operand:VDQ_BHSI 1 "register_operand" "w") - (match_operand:VDQ_BHSI 2 "register_operand" "w"))) - (match_operand:VDQ_BHSI 3 "register_operand" "0")))] - "TARGET_SIMD" - "saba\t%0., %1., %2." + (plus:VDQ_BHSI (minus:VDQ_BHSI + (USMAX:VDQ_BHSI + (match_operand:VDQ_BHSI 2 "register_operand" "w") + (match_operand:VDQ_BHSI 3 "register_operand" "w")) + (:VDQ_BHSI + (match_dup 2) + (match_dup 3))) + (match_operand:VDQ_BHSI 1 "register_operand" "0")))] + "TARGET_SIMD" + "aba\t%0., %2., %3." [(set_attr "type" "neon_arith_acc")] ) diff --git a/gcc/testsuite/gcc.target/aarch64/usaba_1.c b/gcc/testsuite/gcc.target/aarch64/usaba_1.c new file mode 100644 index 00000000000..58b5bebdc94 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/usaba_1.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-tree-reassoc" } */ + +#pragma GCC target "+nosve" + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define FUNC(T, N, S) \ +void saba_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c) \ +{ \ + int i; \ + for (i = 0; i < N; i++) \ + c[i] += (MAX (a[i], b[i]) - MIN (a[i], b[i])); \ +} + +FUNC (signed char, 16, qi) +/* { dg-final { scan-assembler-times {saba\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 1 } } */ +FUNC (short, 8, hi) +/* { dg-final { scan-assembler-times {saba\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h} 1 } } */ +FUNC (int, 4, si) +/* { dg-final { scan-assembler-times {saba\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s} 1 } } */ +FUNC (unsigned char, 16, uqi) +/* { dg-final { scan-assembler-times {uaba\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+.16b} 1 } } */ +FUNC (unsigned short, 8, uhi) +/* { dg-final { scan-assembler-times {uaba\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h} 1 } } */ +FUNC (unsigned int, 4, usi) +/* { dg-final { scan-assembler-times {uaba\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s} 1 } } */ +