From c0c2f013906a695b8a02226f119649a370d9e083 Mon Sep 17 00:00:00 2001 From: Yuliang Wang Date: Mon, 30 Sep 2019 16:55:45 +0000 Subject: [PATCH] [AArch64][SVE] Utilize ASRD instruction for division and remainder 2019-09-30 Yuliang Wang gcc/ * config/aarch64/aarch64-sve.md (sdiv_pow23): New pattern for ASRD. * config/aarch64/iterators.md (UNSPEC_ASRD): New unspec. * internal-fn.def (IFN_DIV_POW2): New internal function. * optabs.def (sdiv_pow2_optab): New optab. * tree-vect-patterns.c (vect_recog_divmod_pattern): Modify pattern to support new operation. * doc/md.texi (sdiv_pow2$var{m3}): Documentation for the above. * doc/sourcebuild.texi (vect_sdiv_pow2_si): Document new target selector. gcc/testsuite/ * gcc.dg/vect/vect-sdiv-pow2-1.c: New test. * gcc.target/aarch64/sve/asrdiv_1.c: As above. * lib/target-supports.exp (check_effective_target_vect_sdiv_pow2_si): Return true for AArch64 with SVE. From-SVN: r276343 --- gcc/ChangeLog | 13 +++ gcc/config/aarch64/aarch64-sve.md | 41 ++++++++++ gcc/config/aarch64/iterators.md | 1 + gcc/doc/md.texi | 11 +++ gcc/doc/sourcebuild.texi | 4 + gcc/internal-fn.def | 2 + gcc/optabs.def | 1 + gcc/testsuite/ChangeLog | 7 ++ gcc/testsuite/gcc.dg/vect/vect-sdiv-pow2-1.c | 79 +++++++++++++++++++ .../gcc.target/aarch64/sve/asrdiv_1.c | 51 ++++++++++++ gcc/testsuite/lib/target-supports.exp | 8 ++ gcc/tree-vect-patterns.c | 32 +++++++- 12 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-sdiv-pow2-1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/asrdiv_1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c3f77270629..78d296d228f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2019-09-30 Yuliang Wang + + * config/aarch64/aarch64-sve.md (sdiv_pow23): + New pattern for ASRD. + * config/aarch64/iterators.md (UNSPEC_ASRD): New unspec. + * internal-fn.def (IFN_DIV_POW2): New internal function. + * optabs.def (sdiv_pow2_optab): New optab. + * tree-vect-patterns.c (vect_recog_divmod_pattern): + Modify pattern to support new operation. + * doc/md.texi (sdiv_pow2$var{m3}): Documentation for the above. + * doc/sourcebuild.texi (vect_sdiv_pow2_si): + Document new target selector. + 2019-09-30 Richard Sandiford * config/aarch64/aarch64.c (aarch64_layout_frame): Use crtl->abi diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index f58353e9c6d..41c8689cefd 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -71,6 +71,7 @@ ;; ---- [INT] Binary logical operations ;; ---- [INT] Binary logical operations (inverted second input) ;; ---- [INT] Shifts +;; ---- [INT] Shifts (rounding towards 0) ;; ---- [FP] General binary arithmetic corresponding to rtx codes ;; ---- [FP] General binary arithmetic corresponding to unspecs ;; ---- [FP] Addition @@ -2563,6 +2564,46 @@ [(set_attr "movprfx" "yes")] ) +;; ------------------------------------------------------------------------- +;; ---- [INT] Shifts (rounding towards 0) +;; ------------------------------------------------------------------------- +;; Includes: +;; - ASRD +;; ------------------------------------------------------------------------- + +;; Unpredicated arithmetic right shift for division by power-of-2. +(define_expand "sdiv_pow23" + [(set (match_operand:SVE_I 0 "register_operand") + (unspec:SVE_I + [(match_dup 3) + (unspec:SVE_I + [(match_operand:SVE_I 1 "register_operand") + (match_operand 2 "aarch64_simd_rshift_imm")] + UNSPEC_ASRD)] + UNSPEC_PRED_X))] + "TARGET_SVE" + { + operands[3] = aarch64_ptrue_reg (mode); + } +) + +;; Predicated ASRD with PTRUE. +(define_insn "*sdiv_pow23" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_I + [(match_operand:SVE_I 2 "register_operand" "0, w") + (match_operand 3 "aarch64_simd_rshift_imm")] + UNSPEC_ASRD)] + UNSPEC_PRED_X))] + "TARGET_SVE" + "@ + asrd\t%0., %1/m, %0., #%3 + movprfx\t%0, %2\;asrd\t%0., %1/m, %0., #%3" + [(set_attr "movprfx" "*,yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] General binary arithmetic corresponding to rtx codes ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 03b3ce36302..1e321af710b 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -538,6 +538,7 @@ UNSPEC_SMULHRS ; Used in aarch64-sve2.md. UNSPEC_UMULHS ; Used in aarch64-sve2.md. UNSPEC_UMULHRS ; Used in aarch64-sve2.md. + UNSPEC_ASRD ; Used in aarch64-sve.md. ]) ;; ------------------------------------------------------------------ diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index f35fd2b1b19..868016a3107 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5414,6 +5414,17 @@ op0 = (narrow) (((((wide) op1 * (wide) op2) >> (N / 2 - 2)) + 1) >> 1); where the sign of @samp{narrow} determines whether this is a signed or unsigned operation, and @var{N} is the size of @samp{wide} in bits. +@cindex @code{sdiv_pow2@var{m3}} instruction pattern +@item @samp{sdiv_pow2@var{m3}} +@cindex @code{sdiv_pow2@var{m3}} instruction pattern +@itemx @samp{sdiv_pow2@var{m3}} +Signed division by power-of-2 immediate. Equivalent to: +@smallexample +signed op0, op1; +@dots{} +op0 = op1 / (1 << imm); +@end smallexample + @cindex @code{vec_shl_insert_@var{m}} instruction pattern @item @samp{vec_shl_insert_@var{m}} Shift the elements in vector input operand 1 left one element (i.e.@: diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 9b98f013263..56967928de6 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1446,6 +1446,10 @@ of bytes. Target supports both signed and unsigned multiply-high-with-round-and-scale operations on vectors of half-words. +@item vect_sdiv_pow2_si +Target supports signed division by constant power-of-2 operations +on vectors of 4-byte integers. + @item vect_condition Target supports vector conditional operations. diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 49f57978c88..a9459449fee 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -140,6 +140,8 @@ DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while) DEF_INTERNAL_OPTAB_FN (VEC_SHL_INSERT, ECF_CONST | ECF_NOTHROW, vec_shl_insert, binary) +DEF_INTERNAL_OPTAB_FN (DIV_POW2, ECF_CONST | ECF_NOTHROW, sdiv_pow2, binary) + DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary) DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary) DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary) diff --git a/gcc/optabs.def b/gcc/optabs.def index 308696846d4..e9373158fc0 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -347,6 +347,7 @@ OPTAB_D (smulhs_optab, "smulhs$a3") OPTAB_D (smulhrs_optab, "smulhrs$a3") OPTAB_D (umulhs_optab, "umulhs$a3") OPTAB_D (umulhrs_optab, "umulhrs$a3") +OPTAB_D (sdiv_pow2_optab, "sdiv_pow2$a3") OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a") OPTAB_D (vec_pack_ssat_optab, "vec_pack_ssat_$a") OPTAB_D (vec_pack_trunc_optab, "vec_pack_trunc_$a") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ce3a9679356..cc31ac8cd5f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2019-09-30 Yuliang Wang + + * gcc.dg/vect/vect-sdiv-pow2-1.c: New test. + * gcc.target/aarch64/sve/asrdiv_1.c: As above. + * lib/target-supports.exp (check_effective_target_vect_sdiv_pow2_si): + Return true for AArch64 with SVE. + 2019-09-30 Richard Sandiford * gcc.target/aarch64/torture/simd-abi-9.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/vect-sdiv-pow2-1.c b/gcc/testsuite/gcc.dg/vect/vect-sdiv-pow2-1.c new file mode 100644 index 00000000000..be70bc6c47e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-sdiv-pow2-1.c @@ -0,0 +1,79 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +#define DIV(x,y) ((x)/(y)) +#define MOD(x,y) ((x)%(y)) + +#define TEMPLATE(PO2,OP) \ +void __attribute__ ((noipa)) \ +f_##PO2##_##OP (int *restrict a, int *restrict b, __INTPTR_TYPE__ n) \ +{ \ + for (__INTPTR_TYPE__ i = 0; i < n; ++i) \ + a[i] = OP (b[i], (1 << PO2)); \ +} +#define TEMPLATES(PO2) \ +TEMPLATE (PO2,DIV); \ +TEMPLATE (PO2,MOD); + +TEMPLATES (1); +TEMPLATES (2); +TEMPLATES (3); +TEMPLATES (7); +TEMPLATES (8); +TEMPLATES (10); +TEMPLATES (15); +TEMPLATES (16); +TEMPLATES (20); + +typedef void (*func_t) (int *, int *, __INTPTR_TYPE__); +typedef struct { + int po2; + func_t div; + func_t mod; +} fn_t; +const fn_t fns[] = { +#define FN_PAIR(PO2) { PO2, f_##PO2##_DIV, f_##PO2##_MOD } + FN_PAIR (1), + FN_PAIR (2), + FN_PAIR (3), + FN_PAIR (7), + FN_PAIR (8), + FN_PAIR (10), + FN_PAIR (15), + FN_PAIR (16), + FN_PAIR (20), +}; + +int __attribute__ ((noipa, noinline)) +power2 (int x) +{ + return 1 << x; +} + +#define N 50 + +int +main (void) +{ + int a[N], b[N], c[N]; + + for (int i = 0; i < (sizeof(fns)/sizeof(fns[0])); i++) + { + int p = power2 (fns[i].po2); + for (int j = 0; j < N; j++) + a[j] = ((p << 4) * j) / (N - 1) - (p << 5); + + fns[i].div (b, a, N); + fns[i].mod (c, a, N); + + for (int j = 0; j < N; j++) + if (a[j] != (b[j] * p + c[j])) + __builtin_abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump {\.DIV_POW2} "vect" { target vect_sdiv_pow2_si } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 18 "vect" { target vect_sdiv_pow2_si } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/asrdiv_1.c b/gcc/testsuite/gcc.target/aarch64/sve/asrdiv_1.c new file mode 100644 index 00000000000..615d8b885ed --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/asrdiv_1.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ + +#include + +#define SIGNED(S) int##S##_t + +#define DIV(x,y) ((x)/(y)) +#define MOD(x,y) ((x)%(y)) + +#define TEMPLATE(OP,SIZE) \ +void __attribute__ ((noinline, noclone)) \ +f_##OP##_##SIZE (SIGNED(SIZE) *restrict a, SIGNED(SIZE) *restrict b, \ + __INTPTR_TYPE__ n) \ +{ \ + for (__INTPTR_TYPE__ i = 0; i < n; ++i) \ + a[i] = OP (b[i], ((SIGNED(SIZE))1 << ((SIZE)/2+1))); \ +} +#define DIVMOD(SIZE) \ +TEMPLATE (DIV,SIZE); \ +TEMPLATE (MOD,SIZE); + +DIVMOD (8); +DIVMOD (16); +DIVMOD (32); +DIVMOD (64); + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 8 "vect" } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.b, p[0-9]+/m, z[0-9]+\.b, #5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.h, p[0-9]+/m, z[0-9]+\.h, #9\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #9\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.s, p[0-9]+/m, z[0-9]+\.s, #17\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #17\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.d, p[0-9]+/m, z[0-9]+\.d, #33\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #33\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-not {\tasr\t%} } } */ +/* { dg-final { scan-assembler-not {\tlsr\t%} } } */ +/* { dg-final { scan-assembler-not {\tcmplt\t%} } } */ +/* { dg-final { scan-assembler-not {\tand\t%} } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 0268acd91d8..a7b76b69b75 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -6256,6 +6256,14 @@ proc check_effective_target_vect_mulhrs_hi {} { && [check_effective_target_aarch64_sve2] }] } +# Return 1 if the target plus current options supports signed division +# by power-of-2 operations on vectors of 4-byte integers. + +proc check_effective_target_vect_sdiv_pow2_si {} { + return [expr { [istarget aarch64*-*-*] + && [check_effective_target_aarch64_sve] }] +} + # Return 1 if the target plus current options supports a vector # demotion (packing) of shorts (to chars) and ints (to shorts) # using modulo arithmetic, 0 otherwise. diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index baa9a4cb8fa..4dfebbefce6 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -2927,6 +2927,37 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out) /* Pattern detected. */ vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt); + *type_out = vectype; + + /* Check if the target supports this internal function. */ + internal_fn ifn = IFN_DIV_POW2; + if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED)) + { + tree shift = build_int_cst (itype, tree_log2 (oprnd1)); + + tree var_div = vect_recog_temp_ssa_var (itype, NULL); + gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift); + gimple_call_set_lhs (div_stmt, var_div); + + if (rhs_code == TRUNC_MOD_EXPR) + { + append_pattern_def_seq (stmt_vinfo, div_stmt); + def_stmt + = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), + LSHIFT_EXPR, var_div, shift); + append_pattern_def_seq (stmt_vinfo, def_stmt); + pattern_stmt + = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), + MINUS_EXPR, oprnd0, + gimple_assign_lhs (def_stmt)); + } + else + pattern_stmt = div_stmt; + gimple_set_location (pattern_stmt, gimple_location (last_stmt)); + + return pattern_stmt; + } + cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0)); if (rhs_code == TRUNC_DIV_EXPR @@ -3003,7 +3034,6 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out) signmask); } - *type_out = vectype; return pattern_stmt; } -- 2.30.2