From 48b63a31f8c02f262579432fd65f61abfefe4980 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 20 Jun 2019 09:06:08 +0200 Subject: [PATCH] tree-vect-stmts.c (enum scan_store_kind): New type. * tree-vect-stmts.c (enum scan_store_kind): New type. (scan_store_can_perm_p): Change last argument from int * to vec *, record precisely which permutations need whole vector left shift or that plus VEC_COND_EXPR. (vectorizable_scan_store): Adjust caller, use whole vector left shift and additional VEC_COND_EXPR only for those iterations that need it. From-SVN: r272497 --- gcc/ChangeLog | 9 ++++ gcc/tree-vect-stmts.c | 116 +++++++++++++++++++++++++++--------------- 2 files changed, 83 insertions(+), 42 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e69a0fc410a..df803bbf2a0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2019-06-20 Jakub Jelinek + + * tree-vect-stmts.c (enum scan_store_kind): New type. + (scan_store_can_perm_p): Change last argument from int * to + vec *, record precisely which permutations + need whole vector left shift or that plus VEC_COND_EXPR. + (vectorizable_scan_store): Adjust caller, use whole vector left shift + and additional VEC_COND_EXPR only for those iterations that need it. + 2019-06-20 Alexandre Oliva * config.gcc: Fix ARM --with-fpu checking and error message. diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 98a5f3e04dd..e87c32df869 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -6354,13 +6354,27 @@ scan_operand_equal_p (tree ref1, tree ref2) } +enum scan_store_kind { + /* Normal permutation. */ + scan_store_kind_perm, + + /* Whole vector left shift permutation with zero init. */ + scan_store_kind_lshift_zero, + + /* Whole vector left shift permutation and VEC_COND_EXPR. */ + scan_store_kind_lshift_cond +}; + /* Function check_scan_store. Verify if we can perform the needed permutations or whole vector shifts. - Return -1 on failure, otherwise exact log2 of vectype's nunits. */ + Return -1 on failure, otherwise exact log2 of vectype's nunits. + USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation + to do at each step. */ static int -scan_store_can_perm_p (tree vectype, tree init, int *use_whole_vector_p = NULL) +scan_store_can_perm_p (tree vectype, tree init, + vec *use_whole_vector = NULL) { enum machine_mode vec_mode = TYPE_MODE (vectype); unsigned HOST_WIDE_INT nunits; @@ -6371,50 +6385,59 @@ scan_store_can_perm_p (tree vectype, tree init, int *use_whole_vector_p = NULL) return -1; int i; + enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm; for (i = 0; i <= units_log2; ++i) { unsigned HOST_WIDE_INT j, k; + enum scan_store_kind kind = scan_store_kind_perm; vec_perm_builder sel (nunits, nunits, 1); sel.quick_grow (nunits); - if (i == 0) + if (i == units_log2) { for (j = 0; j < nunits; ++j) sel[j] = nunits - 1; } else { - for (j = 0; j < (HOST_WIDE_INT_1U << (i - 1)); ++j) + for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j) sel[j] = j; for (k = 0; j < nunits; ++j, ++k) sel[j] = nunits + k; } - vec_perm_indices indices (sel, i == 0 ? 1 : 2, nunits); + vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits); if (!can_vec_perm_const_p (vec_mode, indices)) - break; - } - - if (i == 0) - return -1; - - if (i <= units_log2) - { - if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing) - return -1; - int kind = 1; - /* Whole vector shifts shift in zeros, so if init is all zero constant, - there is no need to do anything further. */ - if ((TREE_CODE (init) != INTEGER_CST - && TREE_CODE (init) != REAL_CST) - || !initializer_zerop (init)) { - tree masktype = build_same_sized_truth_vector_type (vectype); - if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST)) + if (i == units_log2) return -1; - kind = 2; + + if (whole_vector_shift_kind == scan_store_kind_perm) + { + if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing) + return -1; + whole_vector_shift_kind = scan_store_kind_lshift_zero; + /* Whole vector shifts shift in zeros, so if init is all zero + constant, there is no need to do anything further. */ + if ((TREE_CODE (init) != INTEGER_CST + && TREE_CODE (init) != REAL_CST) + || !initializer_zerop (init)) + { + tree masktype = build_same_sized_truth_vector_type (vectype); + if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST)) + return -1; + whole_vector_shift_kind = scan_store_kind_lshift_cond; + } + } + kind = whole_vector_shift_kind; + } + if (use_whole_vector) + { + if (kind != scan_store_kind_perm && use_whole_vector->is_empty ()) + use_whole_vector->safe_grow_cleared (i); + if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ()) + use_whole_vector->safe_push (kind); } - if (use_whole_vector_p) - *use_whole_vector_p = kind; } + return units_log2; } @@ -6726,11 +6749,12 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, unsigned HOST_WIDE_INT nunits; if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) gcc_unreachable (); - int use_whole_vector_p = 0; - int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector_p); + auto_vec use_whole_vector; + int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector); gcc_assert (units_log2 > 0); auto_vec perms; perms.quick_grow (units_log2 + 1); + tree zero_vec = NULL_TREE, masktype = NULL_TREE; for (int i = 0; i <= units_log2; ++i) { unsigned HOST_WIDE_INT j, k; @@ -6739,23 +6763,28 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, if (i == units_log2) for (j = 0; j < nunits; ++j) sel[j] = nunits - 1; - else - { - for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j) - sel[j] = j; - for (k = 0; j < nunits; ++j, ++k) - sel[j] = nunits + k; - } + else + { + for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j) + sel[j] = j; + for (k = 0; j < nunits; ++j, ++k) + sel[j] = nunits + k; + } vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits); - if (use_whole_vector_p && i < units_log2) - perms[i] = vect_gen_perm_mask_any (vectype, indices); + if (!use_whole_vector.is_empty () + && use_whole_vector[i] != scan_store_kind_perm) + { + if (zero_vec == NULL_TREE) + zero_vec = build_zero_cst (vectype); + if (masktype == NULL_TREE + && use_whole_vector[i] == scan_store_kind_lshift_cond) + masktype = build_same_sized_truth_vector_type (vectype); + perms[i] = vect_gen_perm_mask_any (vectype, indices); + } else perms[i] = vect_gen_perm_mask_checked (vectype, indices); } - tree zero_vec = use_whole_vector_p ? build_zero_cst (vectype) : NULL_TREE; - tree masktype = (use_whole_vector_p == 2 - ? build_same_sized_truth_vector_type (vectype) : NULL_TREE); stmt_vec_info prev_stmt_info = NULL; tree vec_oprnd1 = NULL_TREE; tree vec_oprnd2 = NULL_TREE; @@ -6788,7 +6817,10 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, { tree new_temp = make_ssa_name (vectype); gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR, - zero_vec ? zero_vec : vec_oprnd1, v, + (zero_vec + && (use_whole_vector[i] + != scan_store_kind_perm)) + ? zero_vec : vec_oprnd1, v, perms[i]); new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); if (prev_stmt_info == NULL) @@ -6797,7 +6829,7 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; prev_stmt_info = new_stmt_info; - if (use_whole_vector_p == 2) + if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond) { /* Whole vector shift shifted in zero bits, but if *init is not initializer_zerop, we need to replace those elements -- 2.30.2