From 27842e2a1eb26a7eae80b8efd98fb8c8bd74a68e Mon Sep 17 00:00:00 2001 From: Joel Hutton Date: Thu, 19 Nov 2020 10:43:53 +0000 Subject: [PATCH] [3/3] [AArch64][vect] vec_widen_lshift pattern Add aarch64 vec_widen_lshift_lo/hi patterns and fix bug it triggers in mid-end. This pattern takes one vector with N elements of size S, shifts each element left by the element width and stores the results as N elements of size 2*s (in 2 result vectors). The aarch64 backend implements this with the shll,shll2 instruction pair. gcc/ChangeLog: * config/aarch64/aarch64-simd.md: Add vec_widen_lshift_hi/lo patterns. * tree-vect-stmts.c (vectorizable_conversion): Fix for widen_lshift case. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vect-widen-lshift.c: New test. --- gcc/config/aarch64/aarch64-simd.md | 66 +++++++++++++++++++ .../gcc.target/aarch64/vect-widen-lshift.c | 62 +++++++++++++++++ gcc/tree-vect-stmts.c | 5 +- 3 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e8c951fe55e..68baf416045 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4664,8 +4664,74 @@ [(set_attr "type" "neon_sat_shift_reg")] ) +(define_expand "vec_widen_shiftl_lo_" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand:VQW 1 "register_operand" "w") + (match_operand:SI 2 + "aarch64_simd_shift_imm_bitsize_" "i")] + VSHLL))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , false); + emit_insn (gen_aarch64_shll_internal (operands[0], operands[1], + p, operands[2])); + DONE; + } +) + +(define_expand "vec_widen_shiftl_hi_" + [(set (match_operand: 0 "register_operand") + (unspec: [(match_operand:VQW 1 "register_operand" "w") + (match_operand:SI 2 + "immediate_operand" "i")] + VSHLL))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); + emit_insn (gen_aarch64_shll2_internal (operands[0], operands[1], + p, operands[2])); + DONE; + } +) + ;; vshll_n +(define_insn "aarch64_shll_internal" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(vec_select: + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "vect_par_cnst_lo_half" "")) + (match_operand:SI 3 + "aarch64_simd_shift_imm_bitsize_" "i")] + VSHLL))] + "TARGET_SIMD" + { + if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (mode)) + return "shll\\t%0., %1., %3"; + else + return "shll\\t%0., %1., %3"; + } + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_insn "aarch64_shll2_internal" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(vec_select: + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "vect_par_cnst_hi_half" "")) + (match_operand:SI 3 + "aarch64_simd_shift_imm_bitsize_" "i")] + VSHLL))] + "TARGET_SIMD" + { + if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (mode)) + return "shll2\\t%0., %1., %3"; + else + return "shll2\\t%0., %1., %3"; + } + [(set_attr "type" "neon_shift_imm_long")] +) + (define_insn "aarch64_shll_n" [(set (match_operand: 0 "register_operand" "=w") (unspec: [(match_operand:VD_BHSI 1 "register_operand" "w") diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c new file mode 100644 index 00000000000..48a3719d4ba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps" } */ +#include +#include + +#pragma GCC target "+nosve" + +#define ARR_SIZE 1024 + +/* Should produce an shll,shll2 pair*/ +void sshll_opt (int32_t *foo, int16_t *a, int16_t *b) +{ + for( int i = 0; i < ARR_SIZE - 3;i=i+4) + { + foo[i] = a[i] << 16; + foo[i+1] = a[i+1] << 16; + foo[i+2] = a[i+2] << 16; + foo[i+3] = a[i+3] << 16; + } +} + +__attribute__((optimize (0))) +void sshll_nonopt (int32_t *foo, int16_t *a, int16_t *b) +{ + for( int i = 0; i < ARR_SIZE - 3;i=i+4) + { + foo[i] = a[i] << 16; + foo[i+1] = a[i+1] << 16; + foo[i+2] = a[i+2] << 16; + foo[i+3] = a[i+3] << 16; + } +} + + +void __attribute__((optimize (0))) +init(uint16_t *a, uint16_t *b) +{ + for( int i = 0; i < ARR_SIZE;i++) + { + a[i] = i; + b[i] = 2*i; + } +} + +int __attribute__((optimize (0))) +main() +{ + uint32_t foo_arr[ARR_SIZE]; + uint32_t bar_arr[ARR_SIZE]; + uint16_t a[ARR_SIZE]; + uint16_t b[ARR_SIZE]; + + init(a, b); + sshll_opt(foo_arr, a, b); + sshll_nonopt(bar_arr, a, b); + if (memcmp(foo_arr, bar_arr, ARR_SIZE) != 0) + return 1; + return 0; +} + +/* { dg-final { scan-assembler-times {\tshll\t} 1} } */ +/* { dg-final { scan-assembler-times {\tshll2\t} 1} } */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index f88f07a62df..a4980a931a9 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -4935,8 +4935,9 @@ vectorizable_conversion (vec_info *vinfo, &vec_oprnds1); if (code == WIDEN_LSHIFT_EXPR) { - vec_oprnds1.create (ncopies * ninputs); - for (i = 0; i < ncopies * ninputs; ++i) + int oprnds_size = vec_oprnds0.length (); + vec_oprnds1.create (oprnds_size); + for (i = 0; i < oprnds_size; ++i) vec_oprnds1.quick_push (op1); } /* Arguments are ready. Create the new vector stmts. */ -- 2.30.2