From 2ce2720078f3e6448ff26e1c4b2041a2f411aa15 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 27 Nov 2015 08:31:44 +0000 Subject: [PATCH] re PR tree-optimization/68553 (gcc.dg/vect/pr68445.c FAILs) 2015-11-27 Richard Biener PR tree-optimization/68553 * tree-vect-slp.c (vect_get_mask_element): Remove. (vect_transform_slp_perm_load): Implement in a simpler way. * gcc.dg/vect/pr45752.c: Adjust. * gcc.dg/vect/slp-perm-4.c: Likewise. From-SVN: r230993 --- gcc/ChangeLog | 6 + gcc/testsuite/ChangeLog | 6 + gcc/testsuite/gcc.dg/vect/pr45752.c | 22 ++- gcc/testsuite/gcc.dg/vect/slp-perm-4.c | 13 +- gcc/tree-vect-slp.c | 251 ++++++++----------------- 5 files changed, 108 insertions(+), 190 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e48613e2ccd..41ef7446e61 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2015-11-27 Richard Biener + + PR tree-optimization/68553 + * tree-vect-slp.c (vect_get_mask_element): Remove. + (vect_transform_slp_perm_load): Implement in a simpler way. + 2015-11-26 Alexandre Oliva PR rtl-optimization/67753 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 33ef41f751b..57222900c71 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2015-11-27 Richard Biener + + PR tree-optimization/68553 + * gcc.dg/vect/pr45752.c: Adjust. + * gcc.dg/vect/slp-perm-4.c: Likewise. + 2015-11-26 Martin Sebor * g++.dg/init/new45.C (cookie_size): New constant set to a value diff --git a/gcc/testsuite/gcc.dg/vect/pr45752.c b/gcc/testsuite/gcc.dg/vect/pr45752.c index f5173300552..ab95ad64a5b 100644 --- a/gcc/testsuite/gcc.dg/vect/pr45752.c +++ b/gcc/testsuite/gcc.dg/vect/pr45752.c @@ -33,7 +33,7 @@ #define M34 7716 #define M44 16 -#define N 16 +#define N 40 void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput, @@ -75,10 +75,16 @@ void foo (unsigned int *__restrict__ pInput, int main (int argc, const char* argv[]) { unsigned int input[N], output[N], i, input2[N], output2[N]; - unsigned int check_results[N] = {3208, 1334, 28764, 35679, 2789, 13028, - 4754, 168364, 91254, 12399, 22848, 8174, 307964, 146829, 22009, 0}; - unsigned int check_results2[N] = {7136, 2702, 84604, 57909, 6633, 16956, - 6122, 224204, 113484, 16243, 26776, 9542, 363804, 169059, 25853, 0}; + unsigned int check_results[N] + = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399, + 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619, + 42488, 15014, 587164, 257979, 41229, 52308, 18434, 726764, 313554, 50839, + 62128, 21854, 866364, 369129, 60449, 71948, 25274, 1005964, 424704, 70059}; + unsigned int check_results2[N] + = {7136, 2702, 84604, 57909, 6633, 16956, 6122, 224204, 113484, 16243, + 26776, 9542, 363804, 169059, 25853, 36596, 12962, 503404, 224634, 35463, + 46416, 16382, 643004, 280209, 45073, 56236, 19802, 782604, 335784, 54683, + 66056, 23222, 922204, 391359, 64293, 75876, 26642, 1061804, 446934, 73903}; check_vect (); @@ -101,7 +107,5 @@ int main (int argc, const char* argv[]) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ -/* { dg-final { scan-tree-dump "permutation requires at least three vectors" "vect" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ - +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-4.c b/gcc/testsuite/gcc.dg/vect/slp-perm-4.c index d5d548847d4..8e1b5d41c0d 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-4.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-4.c @@ -33,7 +33,7 @@ #define M34 7716 #define M44 16 -#define N 16 +#define N 40 void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput) { @@ -58,7 +58,11 @@ void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput) int main (int argc, const char* argv[]) { unsigned int input[N], output[N], i; - unsigned int check_results[N] = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399, 22848, 8174, 307964, 146829, 22009, 0}; + unsigned int check_results[N] + = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399, + 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619, + 42488, 15014, 587164, 257979, 41229, 52308, 18434, 726764, 313554, 50839, + 62128, 21854, 866364, 369129, 60449, 71948, 25274, 1005964, 424704, 70059}; check_vect (); @@ -80,7 +84,6 @@ int main (int argc, const char* argv[]) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ -/* { dg-final { scan-tree-dump "permutation requires at least three vectors" "vect" { target vect_perm} } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 320248cff1c..ded361a60e7 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3241,102 +3241,6 @@ vect_create_mask_and_perm (gimple *stmt, } -/* Given FIRST_MASK_ELEMENT - the mask element in element representation, - return in CURRENT_MASK_ELEMENT its equivalent in target specific - representation. Check that the mask is valid and return FALSE if not. - Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to - the next vector, i.e., the current first vector is not needed. */ - -static bool -vect_get_mask_element (gimple *stmt, int first_mask_element, int m, - int mask_nunits, bool only_one_vec, int index, - unsigned char *mask, int *current_mask_element, - bool *need_next_vector, int *number_of_mask_fixes, - bool *mask_fixed, bool *needs_first_vector) -{ - int i; - - /* Convert to target specific representation. */ - *current_mask_element = first_mask_element + m; - /* Adjust the value in case it's a mask for second and third vectors. */ - *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1); - - if (*current_mask_element < 0) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "permutation requires past vector "); - dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - return false; - } - - if (*current_mask_element < mask_nunits) - *needs_first_vector = true; - - /* We have only one input vector to permute but the mask accesses values in - the next vector as well. */ - if (only_one_vec && *current_mask_element >= mask_nunits) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "permutation requires at least two vectors "); - dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - - return false; - } - - /* The mask requires the next vector. */ - while (*current_mask_element >= mask_nunits * 2) - { - if (*needs_first_vector || *mask_fixed) - { - /* We either need the first vector too or have already moved to the - next vector. In both cases, this permutation needs three - vectors. */ - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "permutation requires at " - "least three vectors "); - dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - - return false; - } - - /* We move to the next vector, dropping the first one and working with - the second and the third - we need to adjust the values of the mask - accordingly. */ - *current_mask_element -= mask_nunits * *number_of_mask_fixes; - - for (i = 0; i < index; i++) - mask[i] -= mask_nunits * *number_of_mask_fixes; - - (*number_of_mask_fixes)++; - *mask_fixed = true; - } - - *need_next_vector = *mask_fixed; - - /* This was the last element of this mask. Start a new one. */ - if (index == mask_nunits - 1) - { - *number_of_mask_fixes = 1; - *mask_fixed = false; - *needs_first_vector = false; - } - - return true; -} - - /* Generate vector permute statements from a list of loads in DR_CHAIN. If ANALYZE_ONLY is TRUE, only check that it is possible to create valid permute statements for the SLP node NODE of the SLP instance @@ -3350,17 +3254,11 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree mask_element_type = NULL_TREE, mask_type; - int i, j, k, nunits, vec_index = 0; + int nunits, vec_index = 0; tree vectype = STMT_VINFO_VECTYPE (stmt_info); int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); - int first_mask_element; - int index, unroll_factor, current_mask_element, ncopies; + int unroll_factor, mask_element, ncopies; unsigned char *mask; - bool only_one_vec = false, need_next_vector = false; - int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter; - int number_of_mask_fixes = 1; - bool mask_fixed = false; - bool needs_first_vector = false; machine_mode mode; if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) @@ -3391,15 +3289,6 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, mask = XALLOCAVEC (unsigned char, nunits); unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance); - /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE - unrolling factor. */ - orig_vec_stmts_num - = (STMT_VINFO_GROUP_SIZE (stmt_info) - * SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) - + nunits - 1) / nunits; - if (orig_vec_stmts_num == 1) - only_one_vec = true; - /* Number of copies is determined by the final vectorization factor relatively to SLP_NODE_INSTANCE unrolling factor. */ ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance); @@ -3422,75 +3311,85 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, we need the second and the third vectors: {b1,c1,a2,b2} and {c2,a3,b3,c3}. */ - { - index = 0; - vect_stmts_counter = 0; - vec_index = 0; - first_vec_index = vec_index++; - if (only_one_vec) - second_vec_index = first_vec_index; - else - second_vec_index = vec_index++; + int vect_stmts_counter = 0; + int index = 0; + int first_vec_index = -1; + int second_vec_index = -1; - for (j = 0; j < unroll_factor; j++) - { - for (k = 0; k < group_size; k++) - { - i = SLP_TREE_LOAD_PERMUTATION (node)[k]; - first_mask_element = i + j * STMT_VINFO_GROUP_SIZE (stmt_info); - if (!vect_get_mask_element (stmt, first_mask_element, 0, - nunits, only_one_vec, index, - mask, ¤t_mask_element, - &need_next_vector, - &number_of_mask_fixes, &mask_fixed, - &needs_first_vector)) - return false; - gcc_assert (current_mask_element >= 0 - && current_mask_element < 2 * nunits); - mask[index++] = current_mask_element; + for (int j = 0; j < unroll_factor; j++) + { + for (int k = 0; k < group_size; k++) + { + int i = (SLP_TREE_LOAD_PERMUTATION (node)[k] + + j * STMT_VINFO_GROUP_SIZE (stmt_info)); + vec_index = i / nunits; + mask_element = i % nunits; + if (vec_index == first_vec_index + || first_vec_index == -1) + { + first_vec_index = vec_index; + } + else if (vec_index == second_vec_index + || second_vec_index == -1) + { + second_vec_index = vec_index; + mask_element += nunits; + } + else + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "permutation requires at " + "least three vectors "); + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, + stmt, 0); + dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); + } + return false; + } - if (index == nunits) - { - index = 0; - if (!can_vec_perm_p (mode, false, mask)) + gcc_assert (mask_element >= 0 + && mask_element < 2 * nunits); + mask[index++] = mask_element; + + if (index == nunits) + { + if (!can_vec_perm_p (mode, false, mask)) + { + if (dump_enabled_p ()) { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, - vect_location, - "unsupported vect permute { "); - for (i = 0; i < nunits; ++i) - dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", - mask[i]); - dump_printf (MSG_MISSED_OPTIMIZATION, "}\n"); - } - return false; + dump_printf_loc (MSG_MISSED_OPTIMIZATION, + vect_location, + "unsupported vect permute { "); + for (i = 0; i < nunits; ++i) + dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]); + dump_printf (MSG_MISSED_OPTIMIZATION, "}\n"); } + return false; + } - if (!analyze_only) - { - int l; - tree mask_vec, *mask_elts; - mask_elts = XALLOCAVEC (tree, nunits); - for (l = 0; l < nunits; ++l) - mask_elts[l] = build_int_cst (mask_element_type, - mask[l]); - mask_vec = build_vector (mask_type, mask_elts); - - if (need_next_vector) - { - first_vec_index = second_vec_index; - second_vec_index = vec_index; - } + if (!analyze_only) + { + tree mask_vec, *mask_elts; + mask_elts = XALLOCAVEC (tree, nunits); + for (int l = 0; l < nunits; ++l) + mask_elts[l] = build_int_cst (mask_element_type, mask[l]); + mask_vec = build_vector (mask_type, mask_elts); + + if (second_vec_index == -1) + second_vec_index = first_vec_index; + vect_create_mask_and_perm (stmt, mask_vec, first_vec_index, + second_vec_index, + gsi, node, vectype, dr_chain, + ncopies, vect_stmts_counter++); + } - vect_create_mask_and_perm (stmt, - mask_vec, first_vec_index, second_vec_index, - gsi, node, vectype, dr_chain, - ncopies, vect_stmts_counter++); - } - } - } - } + index = 0; + first_vec_index = -1; + second_vec_index = -1; + } + } } return true; -- 2.30.2