From: Prathamesh Kulkarni Date: Mon, 9 Dec 2019 09:59:42 +0000 (+0000) Subject: re PR tree-optimization/89007 ([SVE] Implement generic vector average expansion) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8e03b21e84e4b6a3c806bb48bb5eceac1f7ed273;p=gcc.git re PR tree-optimization/89007 ([SVE] Implement generic vector average expansion) 2019-12-09 Prathamesh Kulkarni PR tree-optimization/89007 * tree-vect-patterns.c (vect_recog_average_pattern): If there is no target support available, generate code to distribute rshift over plus and add a carry. testsuite/ * gcc.target/aarch64/sve/pr89007-1.c: New test. * gcc.target/aarch64/sve/pr89007-2.c: Likewise. From-SVN: r279112 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0d421ca00b3..cee65513d00 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2019-12-09 Prathamesh Kulkarni + + PR tree-optimization/89007 + * tree-vect-patterns.c (vect_recog_average_pattern): If there is no + target support available, generate code to distribute rshift over plus + and add a carry. + 2019-12-09 Martin Liska PR ipa/92737 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c6094cc3913..0706b05b417 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2019-12-09 Prathamesh Kulkarni + + PR tree-optimization/89007 + * gcc.target/aarch64/sve/pr89007-1.c: New test. + * gcc.target/aarch64/sve/pr89007-2.c: Likewise. + 2019-12-09 Hongtao Liu * gcc.target/i386/pr92686.inc: New file. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c new file mode 100644 index 00000000000..af4aff4ec6d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#define N 1024 +unsigned char dst[N]; +unsigned char in1[N]; +unsigned char in2[N]; + +/* +** foo: +** ... +** lsr (z[0-9]+\.b), z[0-9]+\.b, #1 +** lsr (z[0-9]+\.b), z[0-9]+\.b, #1 +** add (z[0-9]+\.b), (\1, \2|\2, \1) +** orr (z[0-9]+)\.d, z[0-9]+\.d, z[0-9]+\.d +** and (z[0-9]+\.b), \5\.b, #0x1 +** add z0\.b, (\3, \6|\6, \3) +** ... +*/ +void +foo () +{ + for( int x = 0; x < N; x++ ) + dst[x] = (in1[x] + in2[x] + 1) >> 1; +} + +/* { dg-final { scan-assembler-not {\tuunpklo\t} } } */ +/* { dg-final { scan-assembler-not {\tuunpkhi\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c new file mode 100644 index 00000000000..2ccdd0d353e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr89007-2.c @@ -0,0 +1,29 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#define N 1024 +unsigned char dst[N]; +unsigned char in1[N]; +unsigned char in2[N]; + +/* +** foo: +** ... +** lsr (z[0-9]+\.b), z[0-9]+\.b, #1 +** lsr (z[0-9]+\.b), z[0-9]+\.b, #1 +** add (z[0-9]+\.b), (\1, \2|\2, \1) +** and (z[0-9]+)\.d, z[0-9]+\.d, z[0-9]+\.d +** and (z[0-9]+\.b), \5\.b, #0x1 +** add z0\.b, (\3, \6|\6, \3) +** ... +*/ +void +foo () +{ + for( int x = 0; x < N; x++ ) + dst[x] = (in1[x] + in2[x]) >> 1; +} + +/* { dg-final { scan-assembler-not {\tuunpklo\t} } } */ +/* { dg-final { scan-assembler-not {\tuunpkhi\t} } } */ diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index 2e021287b4e..c9ad9e0eb94 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -1928,7 +1928,10 @@ vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out) TYPE avg = (TYPE) avg'; where NTYPE is no wider than half of TYPE. Since only the bottom half - of avg is used, all or part of the cast of avg' should become redundant. */ + of avg is used, all or part of the cast of avg' should become redundant. + + If there is no target support available, generate code to distribute rshift + over plus and add a carry. */ static gimple * vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out) @@ -2032,9 +2035,20 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out) /* Check for target support. */ tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type); - if (!new_vectype - || !direct_internal_fn_supported_p (ifn, new_vectype, - OPTIMIZE_FOR_SPEED)) + if (!new_vectype) + return NULL; + + bool fallback_p = false; + + if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED)) + ; + else if (TYPE_UNSIGNED (new_type) + && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar) + && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default) + && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default) + && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default)) + fallback_p = true; + else return NULL; /* The IR requires a valid vector type for the cast result, even though @@ -2043,11 +2057,53 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out) if (!*type_out) return NULL; - /* Generate the IFN_AVG* call. */ tree new_var = vect_recog_temp_ssa_var (new_type, NULL); tree new_ops[2]; vect_convert_inputs (last_stmt_info, 2, new_ops, new_type, unprom, new_vectype); + + if (fallback_p) + { + /* As a fallback, generate code for following sequence: + + shifted_op0 = new_ops[0] >> 1; + shifted_op1 = new_ops[1] >> 1; + sum_of_shifted = shifted_op0 + shifted_op1; + unmasked_carry = new_ops[0] and/or new_ops[1]; + carry = unmasked_carry & 1; + new_var = sum_of_shifted + carry; + */ + + tree one_cst = build_one_cst (new_type); + gassign *g; + + tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL); + g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst); + append_pattern_def_seq (last_stmt_info, g, new_vectype); + + tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL); + g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst); + append_pattern_def_seq (last_stmt_info, g, new_vectype); + + tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL); + g = gimple_build_assign (sum_of_shifted, PLUS_EXPR, + shifted_op0, shifted_op1); + append_pattern_def_seq (last_stmt_info, g, new_vectype); + + tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL); + tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR; + g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]); + append_pattern_def_seq (last_stmt_info, g, new_vectype); + + tree carry = vect_recog_temp_ssa_var (new_type, NULL); + g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst); + append_pattern_def_seq (last_stmt_info, g, new_vectype); + + g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry); + return vect_convert_output (last_stmt_info, type, g, new_vectype); + } + + /* Generate the IFN_AVG* call. */ gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]); gimple_call_set_lhs (average_stmt, new_var);