From 4352288a3df915575a2b820f702242908740106f Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 24 Oct 2019 15:01:45 +0000 Subject: [PATCH] tree-vect-slp.c (vect_get_and_check_slp_defs): For reduction chains try harder with operand swapping and instead of putting a... 2019-10-24 Richard Biener * tree-vect-slp.c (vect_get_and_check_slp_defs): For reduction chains try harder with operand swapping and instead of putting a shifted chain into the reduction operands put a repetition of the final reduction op there as if we'd reassociate the expression. * gcc.dg/vect/slp-reduc-10a.c: New testcase. * gcc.dg/vect/slp-reduc-10b.c: Likewise. * gcc.dg/vect/slp-reduc-10c.c: Likewise. * gcc.dg/vect/slp-reduc-10d.c: Likewise. * gcc.dg/vect/slp-reduc-10e.c: Likewise. From-SVN: r277406 --- gcc/ChangeLog | 8 +++ gcc/testsuite/ChangeLog | 8 +++ gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c | 82 +++++++++++++++++++++++ gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c | 82 +++++++++++++++++++++++ gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c | 82 +++++++++++++++++++++++ gcc/testsuite/gcc.dg/vect/slp-reduc-10d.c | 82 +++++++++++++++++++++++ gcc/testsuite/gcc.dg/vect/slp-reduc-10e.c | 82 +++++++++++++++++++++++ gcc/tree-vect-slp.c | 51 ++++++++++---- 8 files changed, 464 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c create mode 100644 gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c create mode 100644 gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c create mode 100644 gcc/testsuite/gcc.dg/vect/slp-reduc-10d.c create mode 100644 gcc/testsuite/gcc.dg/vect/slp-reduc-10e.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ae2f20ea1df..f63370bd329 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2019-10-24 Richard Biener + + * tree-vect-slp.c (vect_get_and_check_slp_defs): For reduction + chains try harder with operand swapping and instead of + putting a shifted chain into the reduction operands put + a repetition of the final reduction op there as if we'd + reassociate the expression. + 2019-10-24 Jan Hubicka * ipa-reference.c (ipa_reference_optimization_summary_d): Rename diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ee43703ea54..329f244474f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2019-10-24 Richard Biener + + * gcc.dg/vect/slp-reduc-10a.c: New testcase. + * gcc.dg/vect/slp-reduc-10b.c: Likewise. + * gcc.dg/vect/slp-reduc-10c.c: Likewise. + * gcc.dg/vect/slp-reduc-10d.c: Likewise. + * gcc.dg/vect/slp-reduc-10e.c: Likewise. + 2019-10-24 Jozef Lawrynowicz * gcc.target/msp430/emulate-slli.c: Skip for -mcpu=msp430. diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c new file mode 100644 index 00000000000..d3c2c2d7f54 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-10a.c @@ -0,0 +1,82 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fgimple" } */ + +int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3")) +foo (int * x, int n) +{ + int i; + int sum; + int _1; + long unsigned int _2; + long unsigned int _3; + int * _4; + int _5; + __SIZETYPE__ _7; + __SIZETYPE__ _8; + int * _9; + int _10; + __SIZETYPE__ _11; + __SIZETYPE__ _12; + int * _13; + int _14; + __SIZETYPE__ _15; + __SIZETYPE__ _16; + int * _17; + int _18; + + __BB(2,guessed_local(118111600)): + if (n_21(D) > 0) + goto __BB5(guessed(119453778)); + else + goto __BB7(guessed(14763950)); + + __BB(5,guessed_local(105119324)): + goto __BB3(precise(134217728)); + + __BB(3,loop_header(1),guessed_local(955630224)): + sum_30 = __PHI (__BB5: 0, __BB6: sum_27); + i_32 = __PHI (__BB5: 0, __BB6: i_28); + _1 = i_32 * 4; + _2 = (long unsigned int) _1; + _3 = _2 * 4ul; + _4 = x_23(D) + _3; + _5 = __MEM (_4); + sum_24 = _5 + sum_30; + _7 = _2 + 1ul; + _8 = _7 * 4ul; + _9 = x_23(D) + _8; + _10 = __MEM (_9); + sum_25 = _10 + sum_24; + _11 = _2 + 2ul; + _12 = _11 * 4ul; + _13 = x_23(D) + _12; + _14 = __MEM (_13); + sum_26 = _14 + sum_25; + _15 = _2 + 3ul; + _16 = _15 * 4ul; + _17 = x_23(D) + _16; + _18 = __MEM (_17); + sum_27 = _18 + sum_26; + i_28 = i_32 + 1; + if (n_21(D) > i_28) + goto __BB6(guessed(119453778)); + else + goto __BB8(guessed(14763950)); + + __BB(8,guessed_local(105119324)): + goto __BB4(precise(134217728)); + + __BB(6,guessed_local(850510900)): + goto __BB3(precise(134217728)); + + __BB(7,guessed_local(12992276)): + goto __BB4(precise(134217728)); + + __BB(4,guessed_local(118111601)): + sum_31 = __PHI (__BB7: 0, __BB8: sum_27); + return sum_31; + +} + +/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c new file mode 100644 index 00000000000..6a0d55def30 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-10b.c @@ -0,0 +1,82 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fgimple" } */ + +int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3")) +foo (int * x, int n) +{ + int i; + int sum; + int _1; + long unsigned int _2; + long unsigned int _3; + int * _4; + int _5; + __SIZETYPE__ _7; + __SIZETYPE__ _8; + int * _9; + int _100; + __SIZETYPE__ _11; + __SIZETYPE__ _12; + int * _13; + int _14; + __SIZETYPE__ _15; + __SIZETYPE__ _16; + int * _17; + int _18; + + __BB(2,guessed_local(118111600)): + if (n_21(D) > 0) + goto __BB5(guessed(119453778)); + else + goto __BB7(guessed(14763950)); + + __BB(5,guessed_local(105119324)): + goto __BB3(precise(134217728)); + + __BB(3,loop_header(1),guessed_local(955630224)): + sum_30 = __PHI (__BB5: 0, __BB6: sum_27); + i_32 = __PHI (__BB5: 0, __BB6: i_28); + _1 = i_32 * 4; + _2 = (long unsigned int) _1; + _3 = _2 * 4ul; + _4 = x_23(D) + _3; + _5 = __MEM (_4); + sum_24 = _5 + sum_30; + _7 = _2 + 1ul; + _8 = _7 * 4ul; + _9 = x_23(D) + _8; + _100 = __MEM (_9); + sum_25 = sum_24 + _100; + _11 = _2 + 2ul; + _12 = _11 * 4ul; + _13 = x_23(D) + _12; + _14 = __MEM (_13); + sum_26 = _14 + sum_25; + _15 = _2 + 3ul; + _16 = _15 * 4ul; + _17 = x_23(D) + _16; + _18 = __MEM (_17); + sum_27 = _18 + sum_26; + i_28 = i_32 + 1; + if (n_21(D) > i_28) + goto __BB6(guessed(119453778)); + else + goto __BB8(guessed(14763950)); + + __BB(8,guessed_local(105119324)): + goto __BB4(precise(134217728)); + + __BB(6,guessed_local(850510900)): + goto __BB3(precise(134217728)); + + __BB(7,guessed_local(12992276)): + goto __BB4(precise(134217728)); + + __BB(4,guessed_local(118111601)): + sum_31 = __PHI (__BB7: 0, __BB8: sum_27); + return sum_31; + +} + +/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c new file mode 100644 index 00000000000..20df2626764 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-10c.c @@ -0,0 +1,82 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fgimple" } */ + +int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3")) +foo (int * x, int n) +{ + int i; + int sum; + int _1; + long unsigned int _2; + long unsigned int _3; + int * _4; + int _5; + __SIZETYPE__ _7; + __SIZETYPE__ _8; + int * _9; + int _10; + __SIZETYPE__ _11; + __SIZETYPE__ _12; + int * _13; + int _100; + __SIZETYPE__ _15; + __SIZETYPE__ _16; + int * _17; + int _18; + + __BB(2,guessed_local(118111600)): + if (n_21(D) > 0) + goto __BB5(guessed(119453778)); + else + goto __BB7(guessed(14763950)); + + __BB(5,guessed_local(105119324)): + goto __BB3(precise(134217728)); + + __BB(3,loop_header(1),guessed_local(955630224)): + sum_30 = __PHI (__BB5: 0, __BB6: sum_27); + i_32 = __PHI (__BB5: 0, __BB6: i_28); + _1 = i_32 * 4; + _2 = (long unsigned int) _1; + _3 = _2 * 4ul; + _4 = x_23(D) + _3; + _5 = __MEM (_4); + sum_24 = _5 + sum_30; + _7 = _2 + 1ul; + _8 = _7 * 4ul; + _9 = x_23(D) + _8; + _10 = __MEM (_9); + sum_25 = _10 + sum_24; + _11 = _2 + 2ul; + _12 = _11 * 4ul; + _13 = x_23(D) + _12; + _100 = __MEM (_13); + sum_26 = sum_25 + _100; + _15 = _2 + 3ul; + _16 = _15 * 4ul; + _17 = x_23(D) + _16; + _18 = __MEM (_17); + sum_27 = _18 + sum_26; + i_28 = i_32 + 1; + if (n_21(D) > i_28) + goto __BB6(guessed(119453778)); + else + goto __BB8(guessed(14763950)); + + __BB(8,guessed_local(105119324)): + goto __BB4(precise(134217728)); + + __BB(6,guessed_local(850510900)): + goto __BB3(precise(134217728)); + + __BB(7,guessed_local(12992276)): + goto __BB4(precise(134217728)); + + __BB(4,guessed_local(118111601)): + sum_31 = __PHI (__BB7: 0, __BB8: sum_27); + return sum_31; + +} + +/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-10d.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-10d.c new file mode 100644 index 00000000000..8a512d5c14d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-10d.c @@ -0,0 +1,82 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fgimple" } */ + +int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3")) +foo (int * x, int n) +{ + int i; + int sum; + int _1; + long unsigned int _2; + long unsigned int _3; + int * _4; + int _5; + __SIZETYPE__ _7; + __SIZETYPE__ _8; + int * _9; + int _10; + __SIZETYPE__ _11; + __SIZETYPE__ _12; + int * _13; + int _14; + __SIZETYPE__ _15; + __SIZETYPE__ _16; + int * _17; + int _100; + + __BB(2,guessed_local(118111600)): + if (n_21(D) > 0) + goto __BB5(guessed(119453778)); + else + goto __BB7(guessed(14763950)); + + __BB(5,guessed_local(105119324)): + goto __BB3(precise(134217728)); + + __BB(3,loop_header(1),guessed_local(955630224)): + sum_30 = __PHI (__BB5: 0, __BB6: sum_27); + i_32 = __PHI (__BB5: 0, __BB6: i_28); + _1 = i_32 * 4; + _2 = (long unsigned int) _1; + _3 = _2 * 4ul; + _4 = x_23(D) + _3; + _5 = __MEM (_4); + sum_24 = _5 + sum_30; + _7 = _2 + 1ul; + _8 = _7 * 4ul; + _9 = x_23(D) + _8; + _10 = __MEM (_9); + sum_25 = _10 + sum_24; + _11 = _2 + 2ul; + _12 = _11 * 4ul; + _13 = x_23(D) + _12; + _14 = __MEM (_13); + sum_26 = _14 + sum_25; + _15 = _2 + 3ul; + _16 = _15 * 4ul; + _17 = x_23(D) + _16; + _100 = __MEM (_17); + sum_27 = sum_26 + _100; + i_28 = i_32 + 1; + if (n_21(D) > i_28) + goto __BB6(guessed(119453778)); + else + goto __BB8(guessed(14763950)); + + __BB(8,guessed_local(105119324)): + goto __BB4(precise(134217728)); + + __BB(6,guessed_local(850510900)): + goto __BB3(precise(134217728)); + + __BB(7,guessed_local(12992276)): + goto __BB4(precise(134217728)); + + __BB(4,guessed_local(118111601)): + sum_31 = __PHI (__BB7: 0, __BB8: sum_27); + return sum_31; + +} + +/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-10e.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-10e.c new file mode 100644 index 00000000000..268ec9db77d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-10e.c @@ -0,0 +1,82 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fgimple" } */ + +int __GIMPLE (ssa,guessed_local(118111600),startwith("dce3")) +foo (int * x, int n) +{ + int i; + int sum; + int _1; + long unsigned int _2; + long unsigned int _3; + int * _4; + int _100; + __SIZETYPE__ _7; + __SIZETYPE__ _8; + int * _9; + int _10; + __SIZETYPE__ _11; + __SIZETYPE__ _12; + int * _13; + int _14; + __SIZETYPE__ _15; + __SIZETYPE__ _16; + int * _17; + int _18; + + __BB(2,guessed_local(118111600)): + if (n_21(D) > 0) + goto __BB5(guessed(119453778)); + else + goto __BB7(guessed(14763950)); + + __BB(5,guessed_local(105119324)): + goto __BB3(precise(134217728)); + + __BB(3,loop_header(1),guessed_local(955630224)): + sum_30 = __PHI (__BB5: 0, __BB6: sum_27); + i_32 = __PHI (__BB5: 0, __BB6: i_28); + _1 = i_32 * 4; + _2 = (long unsigned int) _1; + _3 = _2 * 4ul; + _4 = x_23(D) + _3; + _100 = __MEM (_4); + sum_24 = sum_30 + _100; + _7 = _2 + 1ul; + _8 = _7 * 4ul; + _9 = x_23(D) + _8; + _10 = __MEM (_9); + sum_25 = _10 + sum_24; + _11 = _2 + 2ul; + _12 = _11 * 4ul; + _13 = x_23(D) + _12; + _14 = __MEM (_13); + sum_26 = _14 + sum_25; + _15 = _2 + 3ul; + _16 = _15 * 4ul; + _17 = x_23(D) + _16; + _18 = __MEM (_17); + sum_27 = _18 + sum_26; + i_28 = i_32 + 1; + if (n_21(D) > i_28) + goto __BB6(guessed(119453778)); + else + goto __BB8(guessed(14763950)); + + __BB(8,guessed_local(105119324)): + goto __BB4(precise(134217728)); + + __BB(6,guessed_local(850510900)): + goto __BB3(precise(134217728)); + + __BB(7,guessed_local(12992276)): + goto __BB4(precise(134217728)); + + __BB(4,guessed_local(118111601)): + sum_31 = __PHI (__BB7: 0, __BB8: sum_27); + return sum_31; + +} + +/* { dg-final { scan-tree-dump "Decided to SLP 1 instances" "vect" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 0af51197a84..3db08fac400 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -433,20 +433,35 @@ again: the def-stmt/s of the first stmt. Allow different definition types for reduction chains: the first stmt must be a vect_reduction_def (a phi node), and the rest - vect_internal_def. */ + end in the reduction chain. */ tree type = TREE_TYPE (oprnd); if ((oprnd_info->first_dt != dt && !(oprnd_info->first_dt == vect_reduction_def - && dt == vect_internal_def) + && !STMT_VINFO_DATA_REF (stmt_info) + && REDUC_GROUP_FIRST_ELEMENT (stmt_info) + && def_stmt_info + && !STMT_VINFO_DATA_REF (def_stmt_info) + && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info) + == REDUC_GROUP_FIRST_ELEMENT (stmt_info))) && !((oprnd_info->first_dt == vect_external_def || oprnd_info->first_dt == vect_constant_def) && (dt == vect_external_def || dt == vect_constant_def))) - || !types_compatible_p (oprnd_info->first_op_type, type)) + || !types_compatible_p (oprnd_info->first_op_type, type) + || (!STMT_VINFO_DATA_REF (stmt_info) + && REDUC_GROUP_FIRST_ELEMENT (stmt_info) + && ((!def_stmt_info + || STMT_VINFO_DATA_REF (def_stmt_info) + || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info) + != REDUC_GROUP_FIRST_ELEMENT (stmt_info))) + != (oprnd_info->first_dt != vect_reduction_def)))) { /* Try swapping operands if we got a mismatch. */ if (i == commutative_op && !swapped) { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "trying swapped operands\n"); swapped = true; goto again; } @@ -484,9 +499,26 @@ again: oprnd_info->ops.quick_push (oprnd); break; + case vect_internal_def: case vect_reduction_def: + if (oprnd_info->first_dt == vect_reduction_def + && !STMT_VINFO_DATA_REF (stmt_info) + && REDUC_GROUP_FIRST_ELEMENT (stmt_info) + && !STMT_VINFO_DATA_REF (def_stmt_info) + && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info) + == REDUC_GROUP_FIRST_ELEMENT (stmt_info))) + { + /* For a SLP reduction chain we want to duplicate the + reduction to each of the chain members. That gets + us a sane SLP graph (still the stmts are not 100% + correct wrt the initial values). */ + gcc_assert (!first); + oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]); + oprnd_info->ops.quick_push (oprnd_info->ops[0]); + break; + } + /* Fallthru. */ case vect_induction_def: - case vect_internal_def: oprnd_info->def_stmts.quick_push (def_stmt_info); oprnd_info->ops.quick_push (oprnd); break; @@ -1182,15 +1214,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, /* Else def types have to match. */ stmt_vec_info other_info; FOR_EACH_VEC_ELT (stmts, i, other_info) - { - /* But for reduction chains only check on the first stmt. */ - if (!STMT_VINFO_DATA_REF (other_info) - && REDUC_GROUP_FIRST_ELEMENT (other_info) - && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info) - continue; - if (STMT_VINFO_DEF_TYPE (other_info) != def_type) - return NULL; - } + if (STMT_VINFO_DEF_TYPE (other_info) != def_type) + return NULL; } else return NULL; -- 2.30.2