From 056775650ac086fb069e00415bf262e22f7579c5 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 4 Dec 2018 08:23:40 +0000 Subject: [PATCH] re PR tree-optimization/88315 (SAD and DOT_PROD SLP reductions with initial value != 0 create wrong code) 2018-12-04 Richard Biener PR tree-optimization/88315 * tree-vect-loop.c (get_initial_defs_for_reduction): Simplify and fix initialization vector for SAD and DOT_PROD SLP reductions. * gcc.dg/vect/slp-reduc-sad.c: Adjust to provide non-trivial initial value. From-SVN: r266771 --- gcc/ChangeLog | 6 + gcc/testsuite/ChangeLog | 6 + gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c | 4 +- gcc/tree-vect-loop.c | 167 ++++++++-------------- 4 files changed, 77 insertions(+), 106 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 19d130bb450..335caf63854 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2018-12-04 Richard Biener + + PR tree-optimization/88315 + * tree-vect-loop.c (get_initial_defs_for_reduction): Simplify + and fix initialization vector for SAD and DOT_PROD SLP reductions. + 2018-12-03 Sandra Loosemore PR c/59039 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f6bd4566c9e..edd24f2e834 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2018-12-04 Richard Biener + + PR tree-optimization/88315 + * gcc.dg/vect/slp-reduc-sad.c: Adjust to provide non-trivial + initial value. + 2018-12-03 Jakub Jelinek PR middle-end/64242 diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c index d921c7cd65d..5f7a3e09f60 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-sad.c @@ -12,7 +12,7 @@ extern void abort (void); int __attribute__((noinline,noclone)) foo (uint8_t *pix1, uint8_t *pix2, int i_stride_pix2) { - int i_sum = 0; + int i_sum = 5; for( int y = 0; y < 16; y++ ) { i_sum += abs ( pix1[0] - pix2[0] ); @@ -52,7 +52,7 @@ main () __asm__ volatile (""); } - if (foo (X, Y, 16) != 32512) + if (foo (X, Y, 16) != 32512 + 5) abort (); return 0; diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 633c3154169..fa926f4ebb5 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -4100,12 +4100,8 @@ get_initial_defs_for_reduction (slp_tree slp_node, unsigned HOST_WIDE_INT nunits; unsigned j, number_of_places_left_in_vector; tree vector_type; - tree vop; - int group_size = stmts.length (); - unsigned int vec_num, i; - unsigned number_of_copies = 1; - vec voprnds; - voprnds.create (number_of_vectors); + unsigned int group_size = stmts.length (); + unsigned int i; struct loop *loop; auto_vec permute_results; @@ -4138,115 +4134,78 @@ get_initial_defs_for_reduction (slp_tree slp_node, if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits)) nunits = group_size; - number_of_copies = nunits * number_of_vectors / group_size; - number_of_places_left_in_vector = nunits; bool constant_p = true; tree_vector_builder elts (vector_type, nunits, 1); elts.quick_grow (nunits); - for (j = 0; j < number_of_copies; j++) + for (j = 0; j < nunits * number_of_vectors; ++j) { - for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--) - { - tree op; - /* Get the def before the loop. In reduction chain we have only - one initial value. */ - if ((j != (number_of_copies - 1) - || (reduc_chain && i != 0)) - && neutral_op) - op = neutral_op; - else - op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe); + tree op; + i = j % group_size; + stmt_vinfo = stmts[i]; - /* Create 'vect_ = {op0,op1,...,opn}'. */ - number_of_places_left_in_vector--; - elts[number_of_places_left_in_vector] = op; - if (!CONSTANT_CLASS_P (op)) - constant_p = false; + /* Get the def before the loop. In reduction chain we have only + one initial value. Else we have as many as PHIs in the group. */ + if (reduc_chain) + op = j != 0 ? neutral_op : PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe); + else if (((vec_oprnds->length () + 1) * nunits + - number_of_places_left_in_vector >= group_size) + && neutral_op) + op = neutral_op; + else + op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe); - if (number_of_places_left_in_vector == 0) - { - gimple_seq ctor_seq = NULL; - tree init; - if (constant_p && !neutral_op - ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits) - : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)) - /* Build the vector directly from ELTS. */ - init = gimple_build_vector (&ctor_seq, &elts); - else if (neutral_op) - { - /* Build a vector of the neutral value and shift the - other elements into place. */ - init = gimple_build_vector_from_val (&ctor_seq, vector_type, - neutral_op); - int k = nunits; - while (k > 0 && elts[k - 1] == neutral_op) - k -= 1; - while (k > 0) - { - k -= 1; - init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT, - vector_type, init, elts[k]); - } - } - else + /* Create 'vect_ = {op0,op1,...,opn}'. */ + number_of_places_left_in_vector--; + elts[nunits - number_of_places_left_in_vector - 1] = op; + if (!CONSTANT_CLASS_P (op)) + constant_p = false; + + if (number_of_places_left_in_vector == 0) + { + gimple_seq ctor_seq = NULL; + tree init; + if (constant_p && !neutral_op + ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits) + : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)) + /* Build the vector directly from ELTS. */ + init = gimple_build_vector (&ctor_seq, &elts); + else if (neutral_op) + { + /* Build a vector of the neutral value and shift the + other elements into place. */ + init = gimple_build_vector_from_val (&ctor_seq, vector_type, + neutral_op); + int k = nunits; + while (k > 0 && elts[k - 1] == neutral_op) + k -= 1; + while (k > 0) { - /* First time round, duplicate ELTS to fill the - required number of vectors, then cherry pick the - appropriate result for each iteration. */ - if (vec_oprnds->is_empty ()) - duplicate_and_interleave (&ctor_seq, vector_type, elts, - number_of_vectors, - permute_results); - init = permute_results[number_of_vectors - j - 1]; + k -= 1; + init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT, + vector_type, init, elts[k]); } - if (ctor_seq != NULL) - gsi_insert_seq_on_edge_immediate (pe, ctor_seq); - voprnds.quick_push (init); - - number_of_places_left_in_vector = nunits; - elts.new_vector (vector_type, nunits, 1); - elts.quick_grow (nunits); - constant_p = true; - } - } - } - - /* Since the vectors are created in the reverse order, we should invert - them. */ - vec_num = voprnds.length (); - for (j = vec_num; j != 0; j--) - { - vop = voprnds[j - 1]; - vec_oprnds->quick_push (vop); - } - - voprnds.release (); - - /* In case that VF is greater than the unrolling factor needed for the SLP - group of stmts, NUMBER_OF_VECTORS to be created is greater than - NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have - to replicate the vectors. */ - tree neutral_vec = NULL; - while (number_of_vectors > vec_oprnds->length ()) - { - if (neutral_op) - { - if (!neutral_vec) + } + else { - gimple_seq ctor_seq = NULL; - neutral_vec = gimple_build_vector_from_val - (&ctor_seq, vector_type, neutral_op); - if (ctor_seq != NULL) - gsi_insert_seq_on_edge_immediate (pe, ctor_seq); + /* First time round, duplicate ELTS to fill the + required number of vectors, then cherry pick the + appropriate result for each iteration. */ + if (vec_oprnds->is_empty ()) + duplicate_and_interleave (&ctor_seq, vector_type, elts, + number_of_vectors, + permute_results); + init = permute_results[number_of_vectors - j - 1]; } - vec_oprnds->quick_push (neutral_vec); - } - else - { - for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++) - vec_oprnds->quick_push (vop); - } + if (ctor_seq != NULL) + gsi_insert_seq_on_edge_immediate (pe, ctor_seq); + vec_oprnds->quick_push (init); + + number_of_places_left_in_vector = nunits; + elts.new_vector (vector_type, nunits, 1); + elts.quick_grow (nunits); + constant_p = true; + } } } -- 2.30.2