From 30f8bf3d6c072a8fce14e8a003dff485a9068a97 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 10 Dec 2019 16:32:40 +0000 Subject: [PATCH] [AArch64] Fix INDEX patterns for partial VNx2 modes The INDEX patterns handle partial modes by choosing the container size rather than the element size, so that the number of lanes (and thus number of additions) matches the mode. This means that all VNx4 modes use .s and all VNx2 modes use .d, etc. When adding this, I'd forgotten that the choice between Wn and Xn registers would need to be updated to use the container size too. For partial VNx2s, we were using .d containers with Wn rather than Xn source registers. 2019-12-10 Richard Sandiford gcc/ * config/aarch64/iterators.md (vccore): New iterator. * config/aarch64/aarch64-sve.md (vec_series): Use it instead of vwcore. (*vec_series_plus): Likewise. gcc/testsuite/ * gcc.target/aarch64/sve/mixed_size_6.c: New test. From-SVN: r279173 --- gcc/ChangeLog | 7 +++ gcc/config/aarch64/aarch64-sve.md | 8 ++-- gcc/config/aarch64/iterators.md | 6 +++ gcc/testsuite/ChangeLog | 4 ++ .../gcc.target/aarch64/sve/mixed_size_6.c | 47 +++++++++++++++++++ 5 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mixed_size_6.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f559d3a6f83..577acb18d78 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2019-12-10 Richard Sandiford + + * config/aarch64/iterators.md (vccore): New iterator. + * config/aarch64/aarch64-sve.md (vec_series): Use it instead + of vwcore. + (*vec_series_plus): Likewise. + 2019-12-10 Frederik Harwath * omp-low.c (scan_omp_for): Use clause location in warning. diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 4427609b579..1d9cdad23bd 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -2541,9 +2541,9 @@ (match_operand: 2 "aarch64_sve_index_operand" "r, Usi, r")))] "TARGET_SVE" "@ - index\t%0., #%1, %2 - index\t%0., %1, #%2 - index\t%0., %1, %2" + index\t%0., #%1, %2 + index\t%0., %1, #%2 + index\t%0., %1, %2" ) ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range @@ -2557,7 +2557,7 @@ "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" { operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); - return "index\t%0., %1, #%2"; + return "index\t%0., %1, #%2"; } ) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 83a0d156e84..1ca5ed1ef1b 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1093,6 +1093,12 @@ (VNx2DI "x") (VNx2DF "x")]) +;; Like vwcore, but for the container mode rather than the element mode. +(define_mode_attr vccore [(VNx16QI "w") (VNx8QI "w") (VNx4QI "w") (VNx2QI "x") + (VNx8HI "w") (VNx4HI "w") (VNx2HI "x") + (VNx4SI "w") (VNx2SI "x") + (VNx2DI "x")]) + ;; Double vector types for ALLX. (define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e2454a65da2..c1f22f2c28e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2019-12-10 Richard Sandiford + + * gcc.target/aarch64/sve/mixed_size_6.c: New test. + 2019-12-10 Frederik Harwath * c-c++-common/goacc/clause-locations.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_6.c b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_6.c new file mode 100644 index 00000000000..837edecf7d0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/mixed_size_6.c @@ -0,0 +1,47 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include + +void +f1 (uint64_t *restrict ptr1, uint8_t *restrict ptr2, uint8_t start) +{ +#pragma GCC unroll 0 + for (int i = 0; i < 4; ++i) + { + ptr1[i] = 10; + ptr2[i] = start; + start += 1; + } +} + +void +f2 (uint64_t *restrict ptr1, uint16_t *restrict ptr2, uint16_t start) +{ +#pragma GCC unroll 0 + for (int i = 0; i < 4; ++i) + { + ptr1[i] = 10; + ptr2[i] = start; + start += 2; + } +} + +void +f3 (uint64_t *restrict ptr1, uint32_t *restrict ptr2, uint32_t start) +{ +#pragma GCC unroll 0 + for (int i = 0; i < 4; ++i) + { + ptr1[i] = 10; + ptr2[i] = start; + start += 4; + } +} + +/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.d, x[0-9]+, #1\n} } } */ +/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.d, x[0-9]+, #1\n} } } */ +/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.d, x[0-9]+, #4\n} } } */ + +/* { dg-final { scan-assembler-not {\tindex\tz[0-9]+\.d, w[0-9]+, #1\n} } } */ +/* { dg-final { scan-assembler-not {\tindex\tz[0-9]+\.d, w[0-9]+, #1\n} } } */ +/* { dg-final { scan-assembler-not {\tindex\tz[0-9]+\.d, w[0-9]+, #4\n} } } */ -- 2.30.2