From b41e6dd50f329b0291457e939d4c0dacd81c82c1 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 4 Jan 2021 11:59:07 +0000 Subject: [PATCH] aarch64: Improve vcombine codegen [PR89057] This patch fixes a codegen regression in the handling of things like: __temp.val[0] \ = vcombine_##funcsuffix (__b.val[0], \ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ in the 64-bit vst[234] functions. The zero was forced into a register at expand time, and we relied on combine to fuse the zero and combine back together into a single combinez pattern. The problem is that the zero could be hoisted before combine gets a chance to do its thing. gcc/ PR target/89057 * config/aarch64/aarch64-simd.md (aarch64_combine): Accept aarch64_simd_reg_or_zero for operand 2. Use the combinez patterns to handle zero operands. gcc/testsuite/ PR target/89057 * gcc.target/aarch64/pr89057.c: New test. --- gcc/config/aarch64/aarch64-simd.md | 15 ++++++++++++--- gcc/testsuite/gcc.target/aarch64/pr89057.c | 16 ++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/pr89057.c diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c4e3b896295..85770c84f0a 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3340,11 +3340,20 @@ (define_expand "aarch64_combine" [(match_operand: 0 "register_operand") (match_operand:VDC 1 "register_operand") - (match_operand:VDC 2 "register_operand")] + (match_operand:VDC 2 "aarch64_simd_reg_or_zero")] "TARGET_SIMD" { - aarch64_split_simd_combine (operands[0], operands[1], operands[2]); - + if (operands[2] == CONST0_RTX (mode)) + { + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_combinez_be (operands[0], operands[1], + operands[2])); + else + emit_insn (gen_aarch64_combinez (operands[0], operands[1], + operands[2])); + } + else + aarch64_split_simd_combine (operands[0], operands[1], operands[2]); DONE; } ) diff --git a/gcc/testsuite/gcc.target/aarch64/pr89057.c b/gcc/testsuite/gcc.target/aarch64/pr89057.c new file mode 100644 index 00000000000..1e200245ddd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr89057.c @@ -0,0 +1,16 @@ +/* { dg-options "-O3" } */ + +#include + +void +f (int32_t *dst, int32_t *src, int n) +{ + for (int i = 0; i < n; ++i) + { + int32x2x3_t a = vld3_s32 (src + i * 6); + int32x2x3_t b = { a.val[2], a.val[1], a.val[0] }; + vst3_s32 (dst + i * 6, b); + } +} + +/* { dg-final { scan-assembler-not {\tins\t} } } */ -- 2.30.2