From: Richard Biener Date: Thu, 22 Jun 2017 12:07:07 +0000 (+0000) Subject: tree-vect-loop.c (vect_model_reduction_cost): Handle COND_REDUCTION and INTEGER_INDUC... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0759db190d979bb4adaeaedd19ab0c823c168691;p=gcc.git tree-vect-loop.c (vect_model_reduction_cost): Handle COND_REDUCTION and INTEGER_INDUC_COND_REDUCTION without REDUC_MAX_EXPR... 2016-06-22 Richard Biener * tree-vect-loop.c (vect_model_reduction_cost): Handle COND_REDUCTION and INTEGER_INDUC_COND_REDUCTION without REDUC_MAX_EXPR support. (vectorizable_reduction): Likewise. (vect_create_epilog_for_reduction): Likewise. * gcc.dg/vect/pr65947-1.c: Remove xfail. * gcc.dg/vect/pr65947-2.c: Likewise. * gcc.dg/vect/pr65947-3.c: Likewise. * gcc.dg/vect/pr65947-4.c: Likewise. * gcc.dg/vect/pr65947-5.c: Likewise. * gcc.dg/vect/pr65947-6.c: Likewise. * gcc.dg/vect/pr65947-8.c: Likewise. * gcc.dg/vect/pr65947-9.c: Likewise. * gcc.dg/vect/pr65947-10.c: Likewise. * gcc.dg/vect/pr65947-12.c: Likewise. * gcc.dg/vect/pr65947-13.c: Likewise. * gcc.dg/vect/pr65947-14.c: Likewise. * gcc.dg/vect/vect-cond-2.c: Likewise. * gcc.dg/vect/vect-pr69848.c: Likewise. From-SVN: r249553 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9b150a48645..4b78674ee3f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2016-06-22 Richard Biener + + * tree-vect-loop.c (vect_model_reduction_cost): Handle + COND_REDUCTION and INTEGER_INDUC_COND_REDUCTION without + REDUC_MAX_EXPR support. + (vectorizable_reduction): Likewise. + (vect_create_epilog_for_reduction): Likewise. + 2017-06-22 James Greenhalgh * match.pd (A / (1 << B) -> A >> B): New. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index af774e433f7..641e4124e37 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,20 @@ +2016-06-22 Richard Biener + + * gcc.dg/vect/pr65947-1.c: Remove xfail. + * gcc.dg/vect/pr65947-2.c: Likewise. + * gcc.dg/vect/pr65947-3.c: Likewise. + * gcc.dg/vect/pr65947-4.c: Likewise. + * gcc.dg/vect/pr65947-5.c: Likewise. + * gcc.dg/vect/pr65947-6.c: Likewise. + * gcc.dg/vect/pr65947-8.c: Likewise. + * gcc.dg/vect/pr65947-9.c: Likewise. + * gcc.dg/vect/pr65947-10.c: Likewise. + * gcc.dg/vect/pr65947-12.c: Likewise. + * gcc.dg/vect/pr65947-13.c: Likewise. + * gcc.dg/vect/pr65947-14.c: Likewise. + * gcc.dg/vect/vect-cond-2.c: Likewise. + * gcc.dg/vect/vect-pr69848.c: Likewise. + 2017-06-22 Martin Liska * gcc.dg/tree-ssa/ipa-split-5.c: Make function bigger in order diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-1.c b/gcc/testsuite/gcc.dg/vect/pr65947-1.c index 93ca4dbcc21..9072f11a104 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-1.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-1.c @@ -40,5 +40,5 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */ -/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail { ! vect_max_reduc } } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-10.c b/gcc/testsuite/gcc.dg/vect/pr65947-10.c index 9bdfd6db518..a8a674f40f0 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-10.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-10.c @@ -40,6 +40,6 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-12.c b/gcc/testsuite/gcc.dg/vect/pr65947-12.c index 2f37aaf01ec..8e2c46f1a6b 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-12.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-12.c @@ -41,5 +41,5 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c index e1b626e45a3..061777af34c 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-13.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c @@ -41,5 +41,5 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-14.c b/gcc/testsuite/gcc.dg/vect/pr65947-14.c index 0d47a7da18d..a28e80bb9fc 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-14.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-14.c @@ -40,5 +40,5 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-2.c b/gcc/testsuite/gcc.dg/vect/pr65947-2.c index 6a36db131e4..d72fffa6720 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-2.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-2.c @@ -41,5 +41,5 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c index 1323ed07c80..98945ba505d 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c @@ -51,5 +51,5 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-4.c b/gcc/testsuite/gcc.dg/vect/pr65947-4.c index d754b8d3ccb..695889d743b 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-4.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-4.c @@ -40,6 +40,6 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */ -/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail { ! vect_max_reduc } } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-5.c b/gcc/testsuite/gcc.dg/vect/pr65947-5.c index b2af501f40d..04d9b38d58a 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-5.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-5.c @@ -41,6 +41,6 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { xfail { ! vect_max_reduc } } } } */ -/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail { ! vect_max_reduc } } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-6.c b/gcc/testsuite/gcc.dg/vect/pr65947-6.c index 12dc852a32f..caa4a14120a 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-6.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-6.c @@ -40,5 +40,5 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-8.c b/gcc/testsuite/gcc.dg/vect/pr65947-8.c index 293118582aa..f0f1ac29699 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-8.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-8.c @@ -42,4 +42,4 @@ main (void) } /* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ -/* { dg-final { scan-tree-dump "multiple types in double reduction or condition reduction" "vect" { xfail { ! vect_max_reduc } } } } */ +/* { dg-final { scan-tree-dump "multiple types in double reduction or condition reduction" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-9.c b/gcc/testsuite/gcc.dg/vect/pr65947-9.c index a43560b6870..d769af9ec73 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-9.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-9.c @@ -46,4 +46,4 @@ main () } /* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ -/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail { ! vect_max_reduc } } } } */ +/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-2.c b/gcc/testsuite/gcc.dg/vect/vect-cond-2.c index 646eac12a38..094cfe76701 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-cond-2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-2.c @@ -39,6 +39,4 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_max_reduc } } } } */ - - +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr69848.c b/gcc/testsuite/gcc.dg/vect/vect-pr69848.c index 779a657b34e..c08f1e2f5c0 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-pr69848.c +++ b/gcc/testsuite/gcc.dg/vect/vect-pr69848.c @@ -34,4 +34,4 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! vect_max_reduc } } } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index d601296ab3e..a7c3d3d7e29 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -3772,6 +3772,18 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, vect_epilogue); } } + else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) + { + unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype); + /* Extraction of scalar elements. */ + epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits, + vec_to_scalar, stmt_info, 0, + vect_epilogue); + /* Scalar max reductions via COND_EXPR / MAX_EXPR. */ + epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits - 3, + scalar_stmt, stmt_info, 0, + vect_epilogue); + } else { int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); @@ -3780,10 +3792,14 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, int element_bitsize = tree_to_uhwi (bitsize); int nelements = vec_size_in_bits / element_bitsize; + if (code == COND_EXPR) + code = MAX_EXPR; + optab = optab_for_tree_code (code, vectype, optab_default); /* We have a whole vector shift available. */ - if (VECTOR_MODE_P (mode) + if (optab != unknown_optab + && VECTOR_MODE_P (mode) && optab_handler (optab, mode) != CODE_FOR_nothing && have_whole_vector_shift (mode)) { @@ -4424,7 +4440,8 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, else new_phi_result = PHI_RESULT (new_phis[0]); - if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) + if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION + && reduc_code != ERROR_MARK) { /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing various data values where the condition matched and another vector @@ -4536,6 +4553,70 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); scalar_results.safe_push (new_temp); } + else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION + && reduc_code == ERROR_MARK) + { + /* Condition redution without supported REDUC_MAX_EXPR. Generate + idx = 0; + idx_val = induction_index[0]; + val = data_reduc[0]; + for (idx = 0, val = init, i = 0; i < nelts; ++i) + if (induction_index[i] > idx_val) + val = data_reduc[i], idx_val = induction_index[i]; + return val; */ + + tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result)); + tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index)); + unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype)); + unsigned HOST_WIDE_INT v_size + = el_size * TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index)); + tree idx_val = NULL_TREE, val = NULL_TREE; + for (unsigned HOST_WIDE_INT off = 0; off < v_size; off += el_size) + { + tree old_idx_val = idx_val; + tree old_val = val; + idx_val = make_ssa_name (idx_eltype); + epilog_stmt = gimple_build_assign (idx_val, BIT_FIELD_REF, + build3 (BIT_FIELD_REF, idx_eltype, + induction_index, + bitsize_int (el_size), + bitsize_int (off))); + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); + val = make_ssa_name (data_eltype); + epilog_stmt = gimple_build_assign (val, BIT_FIELD_REF, + build3 (BIT_FIELD_REF, + data_eltype, + new_phi_result, + bitsize_int (el_size), + bitsize_int (off))); + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); + if (off != 0) + { + tree new_idx_val = idx_val; + tree new_val = val; + if (off != v_size - el_size) + { + new_idx_val = make_ssa_name (idx_eltype); + epilog_stmt = gimple_build_assign (new_idx_val, + MAX_EXPR, idx_val, + old_idx_val); + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); + } + new_val = make_ssa_name (data_eltype); + epilog_stmt = gimple_build_assign (new_val, + COND_EXPR, + build2 (GT_EXPR, + boolean_type_node, + idx_val, + old_idx_val), + val, old_val); + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); + idx_val = new_idx_val; + val = new_val; + } + } + scalar_results.safe_push (val); + } /* 2.3 Create the reduction code, using one of the three schemes described above. In SLP we simply need to extract all the elements from the @@ -4598,6 +4679,10 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); tree vec_temp; + /* COND reductions all do the final reduction with MAX_EXPR. */ + if (code == COND_EXPR) + code = MAX_EXPR; + /* Regardless of whether we have a whole vector shift, if we're emulating the operation via tree-vect-generic, we don't want to use it. Only the first round of the reduction is likely @@ -4763,6 +4848,22 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */ scalar_results.safe_push (new_temp); } + + if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) + == INTEGER_INDUC_COND_REDUCTION) + { + /* Earlier we set the initial value to be zero. Check the result + and if it is zero then replace with the original initial + value. */ + tree zero = build_zero_cst (scalar_type); + tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero); + + tree tmp = make_ssa_name (new_scalar_dest); + epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, + initial_def, new_temp); + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); + scalar_results[0] = tmp; + } } vect_finalize_reduction: @@ -5639,21 +5740,6 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, epilog_reduc_code = ERROR_MARK; } - - /* When epilog_reduc_code is ERROR_MARK then a reduction will be - generated in the epilog using multiple expressions. This does not - work for condition reductions. */ - if (epilog_reduc_code == ERROR_MARK - && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) - == INTEGER_INDUC_COND_REDUCTION - || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) - == CONST_COND_REDUCTION)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "no reduc code for scalar code.\n"); - return false; - } } else { @@ -5674,17 +5760,11 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, cr_index_vector_type = build_vector_type (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out)); - epilog_reduc_code = REDUC_MAX_EXPR; optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type, optab_default); if (optab_handler (optab, TYPE_MODE (cr_index_vector_type)) - == CODE_FOR_nothing) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "reduc max op not supported by target.\n"); - return false; - } + != CODE_FOR_nothing) + epilog_reduc_code = REDUC_MAX_EXPR; } if ((double_reduc