From b81fbfe1eb5759999c69349d6291d27444585fee Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 11 Jan 2021 18:03:20 +0000 Subject: [PATCH] aarch64: Add support for unpacked SVE shifts This patch adds support for unpacked SVE LSL, ASR and LSR. For right shifts, the type suffix needs to be taken from the element size rather than the container size. gcc/ * config/aarch64/aarch64-sve.md (3) (v3, @aarch64_pred_) (*post_ra_v3): Extend from SVE_FULL_I to SVE_I. gcc/testsuite/ * gcc.target/aarch64/sve/shift_2.c: New test. --- gcc/config/aarch64/aarch64-sve.md | 36 ++++----- .../gcc.target/aarch64/sve/shift_2.c | 81 +++++++++++++++++++ 2 files changed, 99 insertions(+), 18 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/shift_2.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 2f5a5e3c914..a58324da869 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -4500,9 +4500,9 @@ ;; Unpredicated shift by a scalar, which expands into one of the vector ;; shifts below. (define_expand "3" - [(set (match_operand:SVE_FULL_I 0 "register_operand") - (ASHIFT:SVE_FULL_I - (match_operand:SVE_FULL_I 1 "register_operand") + [(set (match_operand:SVE_I 0 "register_operand") + (ASHIFT:SVE_I + (match_operand:SVE_I 1 "register_operand") (match_operand: 2 "general_operand")))] "TARGET_SVE" { @@ -4527,12 +4527,12 @@ ;; Unpredicated shift by a vector. (define_expand "v3" - [(set (match_operand:SVE_FULL_I 0 "register_operand") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand") + (unspec:SVE_I [(match_dup 3) - (ASHIFT:SVE_FULL_I - (match_operand:SVE_FULL_I 1 "register_operand") - (match_operand:SVE_FULL_I 2 "aarch64_sve_shift_operand"))] + (ASHIFT:SVE_I + (match_operand:SVE_I 1 "register_operand") + (match_operand:SVE_I 2 "aarch64_sve_shift_operand"))] UNSPEC_PRED_X))] "TARGET_SVE" { @@ -4545,12 +4545,12 @@ ;; likely to gain much and would make the instruction seem less uniform ;; to the register allocator. (define_insn_and_split "@aarch64_pred_" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w") + (unspec:SVE_I [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") - (ASHIFT:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w") - (match_operand:SVE_FULL_I 3 "aarch64_sve_shift_operand" "D, w, 0, w"))] + (ASHIFT:SVE_I + (match_operand:SVE_I 2 "register_operand" "w, 0, w, w") + (match_operand:SVE_I 3 "aarch64_sve_shift_operand" "D, w, 0, w"))] UNSPEC_PRED_X))] "TARGET_SVE" "@ @@ -4560,7 +4560,7 @@ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." "&& reload_completed && !register_operand (operands[3], mode)" - [(set (match_dup 0) (ASHIFT:SVE_FULL_I (match_dup 2) (match_dup 3)))] + [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))] "" [(set_attr "movprfx" "*,*,*,yes")] ) @@ -4569,10 +4569,10 @@ ;; These are generated by splitting a predicated instruction whose ;; predicate is unused. (define_insn "*post_ra_v3" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") - (ASHIFT:SVE_FULL_I - (match_operand:SVE_FULL_I 1 "register_operand" "w") - (match_operand:SVE_FULL_I 2 "aarch64_simd_shift_imm")))] + [(set (match_operand:SVE_I 0 "register_operand" "=w") + (ASHIFT:SVE_I + (match_operand:SVE_I 1 "register_operand" "w") + (match_operand:SVE_I 2 "aarch64_simd_shift_imm")))] "TARGET_SVE && reload_completed" "\t%0., %1., #%2" ) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/shift_2.c b/gcc/testsuite/gcc.target/aarch64/sve/shift_2.c new file mode 100644 index 00000000000..b7462c47db9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/shift_2.c @@ -0,0 +1,81 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include + +#define TEST_SHIFT_IMM(TYPE, NAME, OP, AMT) \ + TYPE NAME##_##TYPE##_##AMT (TYPE a) { return a OP AMT; } + +#define TEST_SHIFT(TYPE, NAME, OP, LIMIT) \ + TYPE NAME##_##TYPE##_reg (TYPE a, TYPE b) { return a OP b; } \ + TEST_SHIFT_IMM (TYPE, NAME, OP, 1) \ + TEST_SHIFT_IMM (TYPE, NAME, OP, 5) \ + TEST_SHIFT_IMM (TYPE, NAME, OP, LIMIT) + +#define TEST_TYPE(TYPE, SIZE, LIMIT) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_SHIFT (TYPE##SIZE, shl, <<, LIMIT) \ + TEST_SHIFT (TYPE##SIZE, shr, >>, LIMIT) \ + +TEST_TYPE (int8_t, 32, 7) +TEST_TYPE (uint8_t, 32, 7) + +TEST_TYPE (int8_t, 64, 7) +TEST_TYPE (uint8_t, 64, 7) +TEST_TYPE (int16_t, 64, 15) +TEST_TYPE (uint16_t, 64, 15) + +TEST_TYPE (int8_t, 128, 7) +TEST_TYPE (uint8_t, 128, 7) +TEST_TYPE (int16_t, 128, 15) +TEST_TYPE (uint16_t, 128, 15) +TEST_TYPE (int32_t, 128, 31) +TEST_TYPE (uint32_t, 128, 31) + +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 1 } } */ -- 2.30.2