From: Richard Biener Date: Wed, 8 Mar 2017 08:50:01 +0000 (+0000) Subject: re PR tree-optimization/79920 (Incorrect floating point results when compiling with... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=61fdfd8c51a99f07b59706037cb2946bc793480c;p=gcc.git re PR tree-optimization/79920 (Incorrect floating point results when compiling with -O3) 2017-03-08 Richard Biener PR tree-optimization/79920 * tree-vect-slp.c (vect_create_mask_and_perm): Remove and inline with ncopies == 1 to ... (vect_transform_slp_perm_load): ... here. Properly compute all element loads by iterating VF times over the group. Do not handle ncopies (computed in a broken way) in vect_create_mask_and_perm. * gcc.dg/vect/pr79920.c: New testcase. From-SVN: r245968 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index fcca6aa4cc3..05f6017c973 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2017-03-08 Richard Biener + + PR tree-optimization/79920 + * tree-vect-slp.c (vect_create_mask_and_perm): Remove and inline + with ncopies == 1 to ... + (vect_transform_slp_perm_load): ... here. Properly compute + all element loads by iterating VF times over the group. Do + not handle ncopies (computed in a broken way) in + vect_create_mask_and_perm. + 2017-03-08 Jakub Jelinek PR sanitizer/79904 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f2fd40e876d..eef88268eba 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-03-08 Richard Biener + + PR tree-optimization/79920 + * gcc.dg/vect/pr79920.c: New testcase. + 2017-03-08 Jakub Jelinek PR sanitizer/79904 diff --git a/gcc/testsuite/gcc.dg/vect/pr79920.c b/gcc/testsuite/gcc.dg/vect/pr79920.c new file mode 100644 index 00000000000..c066b91e73f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr79920.c @@ -0,0 +1,44 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3" } */ + +#include "tree-vect.h" + +double __attribute__((noinline,noclone)) +compute_integral (double w_1[18]) +{ + double A = 0; + double t33[2][6] = {{0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}}; + double t43[2] = {0.0, 0.0}; + double t31[2][2] = {{1.0, 1.0}, {1.0, 1.0}}; + double t32[2][3] = {{0.0, 0.0, 1.0}, {0.0, 0.0, 1.0}}; + + for (int ip_1 = 0; ip_1 < 2; ++ip_1) + { + for (int i_0 = 0; i_0 < 6; ++i_0) + t33[ip_1][i_0] = ((w_1[i_0*3] * t32[ip_1][0]) + + (w_1[i_0*3+2] * t32[ip_1][2])); + t43[ip_1] = 2.0; + } + for (int i_0 = 0; i_0 < 6; ++i_0) + A += t43[1]*t33[1][i_0]; + return A; +} + +int main() +{ + check_vect (); + + double w_1[18] = {0., 1.0, 1.0, + 0., 1.0, 1.0, + 0., 1.0, 1.0, + 0., 1.0, 1.0, + 0., 1.0, 1.0, + 0., 1.0, 1.0}; + double A = compute_integral(w_1); + if (A != 12.0) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_double && { vect_perm && vect_hw_misalign } } } } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 46d1ad6eff2..1300c6a173d 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3379,66 +3379,6 @@ vect_get_slp_defs (vec ops, slp_tree slp_node, } } - -/* Create NCOPIES permutation statements using the mask MASK_BYTES (by - building a vector of type MASK_TYPE from it) and two input vectors placed in - DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and - shifting by STRIDE elements of DR_CHAIN for every copy. - (STRIDE is the number of vectorized stmts for NODE divided by the number of - copies). - VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where - the created stmts must be inserted. */ - -static inline void -vect_create_mask_and_perm (gimple *stmt, - tree mask, int first_vec_indx, int second_vec_indx, - gimple_stmt_iterator *gsi, slp_tree node, - tree vectype, vec dr_chain, - int ncopies, int vect_stmts_counter) -{ - tree perm_dest; - gimple *perm_stmt = NULL; - int i, stride_in, stride_out; - tree first_vec, second_vec, data_ref; - - stride_out = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies; - stride_in = dr_chain.length () / ncopies; - - /* Initialize the vect stmts of NODE to properly insert the generated - stmts later. */ - for (i = SLP_TREE_VEC_STMTS (node).length (); - i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++) - SLP_TREE_VEC_STMTS (node).quick_push (NULL); - - perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype); - for (i = 0; i < ncopies; i++) - { - first_vec = dr_chain[first_vec_indx]; - second_vec = dr_chain[second_vec_indx]; - - /* Generate the permute statement if necessary. */ - if (mask) - { - perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR, - first_vec, second_vec, mask); - data_ref = make_ssa_name (perm_dest, perm_stmt); - gimple_set_lhs (perm_stmt, data_ref); - vect_finish_stmt_generation (stmt, perm_stmt, gsi); - } - else - /* If mask was NULL_TREE generate the requested identity transform. */ - perm_stmt = SSA_NAME_DEF_STMT (first_vec); - - /* Store the vector statement in NODE. */ - SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter] - = perm_stmt; - - first_vec_indx += stride_in; - second_vec_indx += stride_in; - } -} - - /* Generate vector permute statements from a list of loads in DR_CHAIN. If ANALYZE_ONLY is TRUE, only check that it is possible to create valid permute statements for the SLP node NODE of the SLP instance @@ -3456,7 +3396,7 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, int nunits, vec_index = 0; tree vectype = STMT_VINFO_VECTYPE (stmt_info); int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); - int unroll_factor, mask_element, ncopies; + int mask_element; unsigned char *mask; machine_mode mode; @@ -3474,11 +3414,13 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, mask_type = get_vectype_for_scalar_type (mask_element_type); nunits = TYPE_VECTOR_SUBPARTS (vectype); mask = XALLOCAVEC (unsigned char, nunits); - unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance); - /* Number of copies is determined by the final vectorization factor - relatively to SLP_NODE_INSTANCE unrolling factor. */ - ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance); + /* Initialize the vect stmts of NODE to properly insert the generated + stmts later. */ + if (! analyze_only) + for (unsigned i = SLP_TREE_VEC_STMTS (node).length (); + i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++) + SLP_TREE_VEC_STMTS (node).quick_push (NULL); /* Generate permutation masks for every NODE. Number of masks for each NODE is equal to GROUP_SIZE. @@ -3505,7 +3447,7 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, bool noop_p = true; *n_perms = 0; - for (int j = 0; j < unroll_factor; j++) + for (int j = 0; j < vf; j++) { for (int k = 0; k < group_size; k++) { @@ -3578,10 +3520,30 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, if (second_vec_index == -1) second_vec_index = first_vec_index; - vect_create_mask_and_perm (stmt, mask_vec, first_vec_index, - second_vec_index, - gsi, node, vectype, dr_chain, - ncopies, vect_stmts_counter++); + + /* Generate the permute statement if necessary. */ + tree first_vec = dr_chain[first_vec_index]; + tree second_vec = dr_chain[second_vec_index]; + gimple *perm_stmt; + if (! noop_p) + { + tree perm_dest + = vect_create_destination_var (gimple_assign_lhs (stmt), + vectype); + perm_dest = make_ssa_name (perm_dest); + perm_stmt = gimple_build_assign (perm_dest, + VEC_PERM_EXPR, + first_vec, second_vec, + mask_vec); + vect_finish_stmt_generation (stmt, perm_stmt, gsi); + } + else + /* If mask was NULL_TREE generate the requested + identity transform. */ + perm_stmt = SSA_NAME_DEF_STMT (first_vec); + + /* Store the vector statement in NODE. */ + SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt; } index = 0;