From 908a1a166dccefa24ae8b3606f4ce1da944eecb0 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 14 Sep 2017 16:04:32 +0000 Subject: [PATCH] Use vec<> for constant permute masks This patch makes can_vec_perm_p & co. take a vec<>, wrapped in new typedefs vec_perm_indices and auto_vec_perm_indices. There are two reasons for doing this for SVE: (1) it means that the number of elements is bundled with the elements themselves, and is obviously constant. (2) it makes it easier to change the "unsigned char" element type to something wider. Changing the target hook is left as follow-on work. 2017-09-14 Richard Sandiford Alan Hayward David Sherwood gcc/ * target.h (vec_perm_indices): New typedef. (auto_vec_perm_indices): Likewise. * optabs-query.h: Include target.h (can_vec_perm_p): Take a vec_perm_indices *. * optabs-query.c (can_vec_perm_p): Likewise. (can_mult_highpart_p): Update accordingly. Use auto_vec_perm_indices. * tree-ssa-forwprop.c (simplify_vector_constructor): Likewise. * tree-vect-generic.c (lower_vec_perm): Likewise. * tree-vect-data-refs.c (vect_grouped_store_supported): Likewise. (vect_grouped_load_supported): Likewise. (vect_shift_permute_load_chain): Likewise. (vect_permute_store_chain): Use auto_vec_perm_indices. (vect_permute_load_chain): Likewise. * fold-const.c (fold_vec_perm): Take vec_perm_indices. (fold_ternary_loc): Update accordingly. Use auto_vec_perm_indices. Update uses of can_vec_perm_p. * tree-vect-loop.c (calc_vec_perm_mask_for_shift): Replace the mode with a number of elements. Take a vec_perm_indices *. (vect_create_epilog_for_reduction): Update accordingly. Use auto_vec_perm_indices. (have_whole_vector_shift): Likewise. Update call to can_vec_perm_p. * tree-vect-slp.c (vect_build_slp_tree_1): Likewise. (vect_transform_slp_perm_load): Likewise. (vect_schedule_slp_instance): Use auto_vec_perm_indices. * tree-vectorizer.h (vect_gen_perm_mask_any): Take a vec_perm_indices. (vect_gen_perm_mask_checked): Likewise. * tree-vect-stmts.c (vect_gen_perm_mask_any): Take a vec_perm_indices. (vect_gen_perm_mask_checked): Likewise. (vectorizable_mask_load_store): Use auto_vec_perm_indices. (vectorizable_store): Likewise. (vectorizable_load): Likewise. (perm_mask_for_reverse): Likewise. Update call to can_vec_perm_p. (vectorizable_bswap): Likewise. Co-Authored-By: Alan Hayward Co-Authored-By: David Sherwood From-SVN: r252761 --- gcc/ChangeLog | 38 ++++++++++++++++++++++++ gcc/fold-const.c | 33 ++++++++++++--------- gcc/optabs-query.c | 19 ++++++------ gcc/optabs-query.h | 3 +- gcc/target.h | 8 +++++ gcc/tree-ssa-forwprop.c | 11 +++---- gcc/tree-vect-data-refs.c | 62 ++++++++++++++++++++++----------------- gcc/tree-vect-generic.c | 8 ++--- gcc/tree-vect-loop.c | 24 ++++++++------- gcc/tree-vect-slp.c | 25 ++++++++-------- gcc/tree-vect-stmts.c | 52 ++++++++++++++++---------------- gcc/tree-vectorizer.h | 4 +-- 12 files changed, 176 insertions(+), 111 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b52eced6e1a..a6c6fcc5cef 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,41 @@ +2017-09-14 Richard Sandiford + Alan Hayward + David Sherwood + + * target.h (vec_perm_indices): New typedef. + (auto_vec_perm_indices): Likewise. + * optabs-query.h: Include target.h + (can_vec_perm_p): Take a vec_perm_indices *. + * optabs-query.c (can_vec_perm_p): Likewise. + (can_mult_highpart_p): Update accordingly. Use auto_vec_perm_indices. + * tree-ssa-forwprop.c (simplify_vector_constructor): Likewise. + * tree-vect-generic.c (lower_vec_perm): Likewise. + * tree-vect-data-refs.c (vect_grouped_store_supported): Likewise. + (vect_grouped_load_supported): Likewise. + (vect_shift_permute_load_chain): Likewise. + (vect_permute_store_chain): Use auto_vec_perm_indices. + (vect_permute_load_chain): Likewise. + * fold-const.c (fold_vec_perm): Take vec_perm_indices. + (fold_ternary_loc): Update accordingly. Use auto_vec_perm_indices. + Update uses of can_vec_perm_p. + * tree-vect-loop.c (calc_vec_perm_mask_for_shift): Replace the + mode with a number of elements. Take a vec_perm_indices *. + (vect_create_epilog_for_reduction): Update accordingly. + Use auto_vec_perm_indices. + (have_whole_vector_shift): Likewise. Update call to can_vec_perm_p. + * tree-vect-slp.c (vect_build_slp_tree_1): Likewise. + (vect_transform_slp_perm_load): Likewise. + (vect_schedule_slp_instance): Use auto_vec_perm_indices. + * tree-vectorizer.h (vect_gen_perm_mask_any): Take a vec_perm_indices. + (vect_gen_perm_mask_checked): Likewise. + * tree-vect-stmts.c (vect_gen_perm_mask_any): Take a vec_perm_indices. + (vect_gen_perm_mask_checked): Likewise. + (vectorizable_mask_load_store): Use auto_vec_perm_indices. + (vectorizable_store): Likewise. + (vectorizable_load): Likewise. + (perm_mask_for_reverse): Likewise. Update call to can_vec_perm_p. + (vectorizable_bswap): Likewise. + 2017-09-14 Richard Sandiford Alan Hayward David Sherwood diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 013081da673..fa9d1bb20ff 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -8786,12 +8786,14 @@ vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts) NULL_TREE otherwise. */ static tree -fold_vec_perm (tree type, tree arg0, tree arg1, const unsigned char *sel) +fold_vec_perm (tree type, tree arg0, tree arg1, vec_perm_indices sel) { - unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i; + unsigned int i; bool need_ctor = false; - gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts + unsigned int nelts = sel.length (); + gcc_assert (TYPE_VECTOR_SUBPARTS (type) == nelts + && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)) == nelts); if (TREE_TYPE (TREE_TYPE (arg0)) != TREE_TYPE (type) || TREE_TYPE (TREE_TYPE (arg1)) != TREE_TYPE (type)) @@ -11312,15 +11314,15 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, || TREE_CODE (arg2) == CONSTRUCTOR)) { unsigned int nelts = VECTOR_CST_NELTS (arg0), i; - unsigned char *sel = XALLOCAVEC (unsigned char, nelts); gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type)); + auto_vec_perm_indices sel (nelts); for (i = 0; i < nelts; i++) { tree val = VECTOR_CST_ELT (arg0, i); if (integer_all_onesp (val)) - sel[i] = i; + sel.quick_push (i); else if (integer_zerop (val)) - sel[i] = nelts + i; + sel.quick_push (nelts + i); else /* Currently unreachable. */ return NULL_TREE; } @@ -11643,8 +11645,6 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, if (TREE_CODE (arg2) == VECTOR_CST) { unsigned int nelts = VECTOR_CST_NELTS (arg2), i, mask, mask2; - unsigned char *sel = XALLOCAVEC (unsigned char, 2 * nelts); - unsigned char *sel2 = sel + nelts; bool need_mask_canon = false; bool need_mask_canon2 = false; bool all_in_vec0 = true; @@ -11656,6 +11656,8 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, mask2 = 2 * nelts - 1; mask = single_arg ? (nelts - 1) : mask2; gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type)); + auto_vec_perm_indices sel (nelts); + auto_vec_perm_indices sel2 (nelts); for (i = 0; i < nelts; i++) { tree val = VECTOR_CST_ELT (arg2, i); @@ -11667,16 +11669,19 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, wide_int t = val; need_mask_canon |= wi::gtu_p (t, mask); need_mask_canon2 |= wi::gtu_p (t, mask2); - sel[i] = t.to_uhwi () & mask; - sel2[i] = t.to_uhwi () & mask2; + unsigned int elt = t.to_uhwi () & mask; + unsigned int elt2 = t.to_uhwi () & mask2; - if (sel[i] < nelts) + if (elt < nelts) all_in_vec1 = false; else all_in_vec0 = false; - if ((sel[i] & (nelts-1)) != i) + if ((elt & (nelts - 1)) != i) maybe_identity = false; + + sel.quick_push (elt); + sel2.quick_push (elt2); } if (maybe_identity) @@ -11714,8 +11719,8 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, argument permutation while still allowing an equivalent 2-argument version. */ if (need_mask_canon && arg2 == op2 - && !can_vec_perm_p (TYPE_MODE (type), false, sel) - && can_vec_perm_p (TYPE_MODE (type), false, sel2)) + && !can_vec_perm_p (TYPE_MODE (type), false, &sel) + && can_vec_perm_p (TYPE_MODE (type), false, &sel2)) { need_mask_canon = need_mask_canon2; sel = sel2; diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c index 81b1bd9ca68..ced6f57f4c2 100644 --- a/gcc/optabs-query.c +++ b/gcc/optabs-query.c @@ -353,8 +353,7 @@ can_conditionally_move_p (machine_mode mode) zeroes; this case is not dealt with here. */ bool -can_vec_perm_p (machine_mode mode, bool variable, - const unsigned char *sel) +can_vec_perm_p (machine_mode mode, bool variable, vec_perm_indices *sel) { machine_mode qimode; @@ -368,7 +367,7 @@ can_vec_perm_p (machine_mode mode, bool variable, if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing && (sel == NULL || targetm.vectorize.vec_perm_const_ok == NULL - || targetm.vectorize.vec_perm_const_ok (mode, sel))) + || targetm.vectorize.vec_perm_const_ok (mode, &(*sel)[0]))) return true; } @@ -460,7 +459,6 @@ int can_mult_highpart_p (machine_mode mode, bool uns_p) { optab op; - unsigned char *sel; unsigned i, nunits; op = uns_p ? umul_highpart_optab : smul_highpart_optab; @@ -472,7 +470,6 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) return 0; nunits = GET_MODE_NUNITS (mode); - sel = XALLOCAVEC (unsigned char, nunits); op = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab; if (optab_handler (op, mode) != CODE_FOR_nothing) @@ -480,9 +477,12 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) op = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab; if (optab_handler (op, mode) != CODE_FOR_nothing) { + auto_vec_perm_indices sel (nunits); for (i = 0; i < nunits; ++i) - sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0); - if (can_vec_perm_p (mode, false, sel)) + sel.quick_push (!BYTES_BIG_ENDIAN + + (i & ~1) + + ((i & 1) ? nunits : 0)); + if (can_vec_perm_p (mode, false, &sel)) return 2; } } @@ -493,9 +493,10 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) op = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab; if (optab_handler (op, mode) != CODE_FOR_nothing) { + auto_vec_perm_indices sel (nunits); for (i = 0; i < nunits; ++i) - sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1); - if (can_vec_perm_p (mode, false, sel)) + sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1)); + if (can_vec_perm_p (mode, false, &sel)) return 3; } } diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h index 1612fc80a03..9c2d57404f0 100644 --- a/gcc/optabs-query.h +++ b/gcc/optabs-query.h @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see #define GCC_OPTABS_QUERY_H #include "insn-opinit.h" +#include "target.h" /* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing if the target does not have such an insn. */ @@ -165,7 +166,7 @@ enum insn_code can_extend_p (machine_mode, machine_mode, int); enum insn_code can_float_p (machine_mode, machine_mode, int); enum insn_code can_fix_p (machine_mode, machine_mode, int, bool *); bool can_conditionally_move_p (machine_mode mode); -bool can_vec_perm_p (machine_mode, bool, const unsigned char *); +bool can_vec_perm_p (machine_mode, bool, vec_perm_indices *); enum insn_code widening_optab_handler (optab, machine_mode, machine_mode); /* Find a widening optab even if it doesn't widen as much as we want. */ #define find_widening_optab_handler(A,B,C,D) \ diff --git a/gcc/target.h b/gcc/target.h index 393de408b97..64e1d68f0e0 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -191,6 +191,14 @@ enum vect_cost_model_location { vect_epilogue = 2 }; +/* The type to use for vector permutes with a constant permute vector. + Each entry is an index into the concatenated input vectors. */ +typedef vec vec_perm_indices; + +/* Same, but can be used to construct local permute vectors that are + automatically freed. */ +typedef auto_vec auto_vec_perm_indices; + /* The target structure. This holds all the backend hooks. */ #define DEFHOOKPOD(NAME, DOC, TYPE, INIT) TYPE NAME; #define DEFHOOK(NAME, DOC, TYPE, PARAMS, INIT) TYPE (* NAME) PARAMS; diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index 82d940bd36e..11511b4284c 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -1952,7 +1952,6 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) unsigned elem_size, nelts, i; enum tree_code code, conv_code; constructor_elt *elt; - unsigned char *sel; bool maybe_ident; gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR); @@ -1965,7 +1964,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) elem_type = TREE_TYPE (type); elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); - sel = XALLOCAVEC (unsigned char, nelts); + auto_vec_perm_indices sel (nelts); orig = NULL; conv_code = ERROR_MARK; maybe_ident = true; @@ -2023,8 +2022,10 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) } if (TREE_INT_CST_LOW (TREE_OPERAND (op1, 1)) != elem_size) return false; - sel[i] = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size; - if (sel[i] != i) maybe_ident = false; + unsigned int elt = TREE_INT_CST_LOW (TREE_OPERAND (op1, 2)) / elem_size; + if (elt != i) + maybe_ident = false; + sel.quick_push (elt); } if (i < nelts) return false; @@ -2053,7 +2054,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) { tree mask_type; - if (!can_vec_perm_p (TYPE_MODE (type), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (type), false, &sel)) return false; mask_type = build_vector_type (build_nonstandard_integer_type (elem_size, 1), diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 070c707fdaf..0b3b968c172 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -4547,7 +4547,8 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) if (VECTOR_MODE_P (mode)) { unsigned int i, nelt = GET_MODE_NUNITS (mode); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); if (count == 3) { @@ -4568,7 +4569,7 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) if (3 * i + nelt2 < nelt) sel[3 * i + nelt2] = 0; } - if (!can_vec_perm_p (mode, false, sel)) + if (!can_vec_perm_p (mode, false, &sel)) { if (dump_enabled_p ()) dump_printf (MSG_MISSED_OPTIMIZATION, @@ -4585,7 +4586,7 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) if (3 * i + nelt2 < nelt) sel[3 * i + nelt2] = nelt + j2++; } - if (!can_vec_perm_p (mode, false, sel)) + if (!can_vec_perm_p (mode, false, &sel)) { if (dump_enabled_p ()) dump_printf (MSG_MISSED_OPTIMIZATION, @@ -4605,13 +4606,13 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) sel[i * 2] = i; sel[i * 2 + 1] = i + nelt; } - if (can_vec_perm_p (mode, false, sel)) - { - for (i = 0; i < nelt; i++) - sel[i] += nelt / 2; - if (can_vec_perm_p (mode, false, sel)) - return true; - } + if (can_vec_perm_p (mode, false, &sel)) + { + for (i = 0; i < nelt; i++) + sel[i] += nelt / 2; + if (can_vec_perm_p (mode, false, &sel)) + return true; + } } } @@ -4710,7 +4711,9 @@ vect_permute_store_chain (vec dr_chain, tree perm3_mask_low, perm3_mask_high; unsigned int i, n, log_length = exact_log2 (length); unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); result_chain->quick_grow (length); memcpy (result_chain->address (), dr_chain.address (), @@ -5132,7 +5135,8 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, if (VECTOR_MODE_P (mode)) { unsigned int i, j, nelt = GET_MODE_NUNITS (mode); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); if (count == 3) { @@ -5144,7 +5148,7 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, sel[i] = 3 * i + k; else sel[i] = 0; - if (!can_vec_perm_p (mode, false, sel)) + if (!can_vec_perm_p (mode, false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5157,7 +5161,7 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, sel[i] = i; else sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++); - if (!can_vec_perm_p (mode, false, sel)) + if (!can_vec_perm_p (mode, false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5174,11 +5178,11 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, gcc_assert (pow2p_hwi (count)); for (i = 0; i < nelt; i++) sel[i] = i * 2; - if (can_vec_perm_p (mode, false, sel)) + if (can_vec_perm_p (mode, false, &sel)) { for (i = 0; i < nelt; i++) sel[i] = i * 2 + 1; - if (can_vec_perm_p (mode, false, sel)) + if (can_vec_perm_p (mode, false, &sel)) return true; } } @@ -5292,7 +5296,9 @@ vect_permute_load_chain (vec dr_chain, tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); unsigned int i, j, log_length = exact_log2 (length); unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); result_chain->quick_grow (length); memcpy (result_chain->address (), dr_chain.address (), @@ -5486,10 +5492,12 @@ vect_shift_permute_load_chain (vec dr_chain, tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); unsigned int i; unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + auto_vec_perm_indices sel (nelt); + sel.quick_grow (nelt); + result_chain->quick_grow (length); memcpy (result_chain->address (), dr_chain.address (), length * sizeof (tree)); @@ -5501,7 +5509,7 @@ vect_shift_permute_load_chain (vec dr_chain, sel[i] = i * 2; for (i = 0; i < nelt / 2; ++i) sel[nelt / 2 + i] = i * 2 + 1; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5515,7 +5523,7 @@ vect_shift_permute_load_chain (vec dr_chain, sel[i] = i * 2 + 1; for (i = 0; i < nelt / 2; ++i) sel[nelt / 2 + i] = i * 2; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5529,7 +5537,7 @@ vect_shift_permute_load_chain (vec dr_chain, For vector length 8 it is {4 5 6 7 8 9 10 11}. */ for (i = 0; i < nelt; i++) sel[i] = nelt / 2 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5544,7 +5552,7 @@ vect_shift_permute_load_chain (vec dr_chain, sel[i] = i; for (i = nelt / 2; i < nelt; i++) sel[i] = nelt + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5607,7 +5615,7 @@ vect_shift_permute_load_chain (vec dr_chain, sel[i] = 3 * k + (l % 3); k++; } - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5621,7 +5629,7 @@ vect_shift_permute_load_chain (vec dr_chain, For vector length 8 it is {6 7 8 9 10 11 12 13}. */ for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + (nelt % 3) + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5634,7 +5642,7 @@ vect_shift_permute_load_chain (vec dr_chain, For vector length 8 it is {5 6 7 8 9 10 11 12}. */ for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + 1 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5647,7 +5655,7 @@ vect_shift_permute_load_chain (vec dr_chain, For vector length 8 it is {3 4 5 6 7 8 9 10}. */ for (i = 0; i < nelt; i++) sel[i] = (nelt / 3) + (nelt % 3) / 2 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5660,7 +5668,7 @@ vect_shift_permute_load_chain (vec dr_chain, For vector length 8 it is {5 6 7 8 9 10 11 12}. */ for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i; - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index 1341d66deed..b114ff09006 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -1300,13 +1300,13 @@ lower_vec_perm (gimple_stmt_iterator *gsi) if (TREE_CODE (mask) == VECTOR_CST) { - unsigned char *sel_int = XALLOCAVEC (unsigned char, elements); + auto_vec_perm_indices sel_int (elements); for (i = 0; i < elements; ++i) - sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i)) - & (2 * elements - 1)); + sel_int.quick_push (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i)) + & (2 * elements - 1)); - if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int)) + if (can_vec_perm_p (TYPE_MODE (vect_type), false, &sel_int)) { gimple_assign_set_rhs3 (stmt, mask); update_stmt (stmt); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 3b4a71eba89..8135219ea24 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -3698,15 +3698,15 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, } /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET - vector elements (not bits) for a vector of mode MODE. */ + vector elements (not bits) for a vector with NELT elements. */ static void -calc_vec_perm_mask_for_shift (machine_mode mode, unsigned int offset, - unsigned char *sel) +calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt, + vec_perm_indices *sel) { - unsigned int i, nelt = GET_MODE_NUNITS (mode); + unsigned int i; for (i = 0; i < nelt; i++) - sel[i] = (i + offset) & (2*nelt - 1); + sel->quick_push ((i + offset) & (2 * nelt - 1)); } /* Checks whether the target supports whole-vector shifts for vectors of mode @@ -3722,12 +3722,13 @@ have_whole_vector_shift (machine_mode mode) return false; unsigned int i, nelt = GET_MODE_NUNITS (mode); - unsigned char *sel = XALLOCAVEC (unsigned char, nelt); + auto_vec_perm_indices sel (nelt); for (i = nelt/2; i >= 1; i/=2) { - calc_vec_perm_mask_for_shift (mode, i, sel); - if (!can_vec_perm_p (mode, false, sel)) + sel.truncate (0); + calc_vec_perm_mask_for_shift (i, nelt, &sel); + if (!can_vec_perm_p (mode, false, &sel)) return false; } return true; @@ -5059,7 +5060,7 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, if (reduce_with_shift && !slp_reduc) { int nelements = vec_size_in_bits / element_bitsize; - unsigned char *sel = XALLOCAVEC (unsigned char, nelements); + auto_vec_perm_indices sel (nelements); int elt_offset; @@ -5083,8 +5084,9 @@ vect_create_epilog_for_reduction (vec vect_defs, gimple *stmt, elt_offset >= 1; elt_offset /= 2) { - calc_vec_perm_mask_for_shift (mode, elt_offset, sel); - tree mask = vect_gen_perm_mask_any (vectype, sel); + sel.truncate (0); + calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel); + tree mask = vect_gen_perm_mask_any (vectype, sel); epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR, new_temp, zero_vec, mask); new_name = make_ssa_name (vec_dest, epilog_stmt); diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 32ca6afa614..32174fe6bb0 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -873,15 +873,16 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, if (alt_stmt_code != ERROR_MARK && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference) { - unsigned char *sel - = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype)); - for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i) + unsigned int count = TYPE_VECTOR_SUBPARTS (vectype); + auto_vec_perm_indices sel (count); + for (i = 0; i < count; ++i) { - sel[i] = i; + unsigned int elt = i; if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code) - sel[i] += TYPE_VECTOR_SUBPARTS (vectype); + elt += count; + sel.quick_push (elt); } - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) { for (i = 0; i < group_size; ++i) if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code) @@ -3486,7 +3487,6 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, tree vectype = STMT_VINFO_VECTYPE (stmt_info); int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); int mask_element; - unsigned char *mask; machine_mode mode; if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) @@ -3502,7 +3502,8 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1); mask_type = get_vectype_for_scalar_type (mask_element_type); nunits = TYPE_VECTOR_SUBPARTS (vectype); - mask = XALLOCAVEC (unsigned char, nunits); + auto_vec_perm_indices mask (nunits); + mask.quick_grow (nunits); /* Initialize the vect stmts of NODE to properly insert the generated stmts later. */ @@ -3577,7 +3578,7 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, if (index == nunits) { if (! noop_p - && ! can_vec_perm_p (mode, false, mask)) + && ! can_vec_perm_p (mode, false, &mask)) { if (dump_enabled_p ()) { @@ -3730,15 +3731,15 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance, enum tree_code code0 = gimple_assign_rhs_code (stmt); enum tree_code ocode = ERROR_MARK; gimple *ostmt; - unsigned char *mask = XALLOCAVEC (unsigned char, group_size); + auto_vec_perm_indices mask (group_size); FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt) if (gimple_assign_rhs_code (ostmt) != code0) { - mask[i] = 1; + mask.quick_push (1); ocode = gimple_assign_rhs_code (ostmt); } else - mask[i] = 0; + mask.quick_push (0); if (ocode != ERROR_MARK) { vec v0; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index b5f706c1f31..0cee0d48792 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1706,15 +1706,14 @@ static tree perm_mask_for_reverse (tree vectype) { int i, nunits; - unsigned char *sel; nunits = TYPE_VECTOR_SUBPARTS (vectype); - sel = XALLOCAVEC (unsigned char, nunits); + auto_vec_perm_indices sel (nunits); for (i = 0; i < nunits; ++i) - sel[i] = nunits - 1 - i; + sel.quick_push (nunits - 1 - i); - if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) + if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) return NULL_TREE; return vect_gen_perm_mask_checked (vectype, sel); } @@ -2171,19 +2170,20 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, modifier = NONE; else if (nunits == gather_off_nunits / 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits); modifier = WIDEN; + auto_vec_perm_indices sel (gather_off_nunits); for (i = 0; i < gather_off_nunits; ++i) - sel[i] = i | nunits; + sel.quick_push (i | nunits); perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel); } else if (nunits == gather_off_nunits * 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, nunits); modifier = NARROW; + auto_vec_perm_indices sel (nunits); + sel.quick_grow (nunits); for (i = 0; i < nunits; ++i) sel[i] = i < gather_off_nunits ? i : i + nunits - gather_off_nunits; @@ -2481,14 +2481,14 @@ vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi, return false; unsigned int num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype); - unsigned char *elts = XALLOCAVEC (unsigned char, num_bytes); - unsigned char *elt = elts; unsigned word_bytes = num_bytes / nunits; + + auto_vec_perm_indices elts (num_bytes); for (unsigned i = 0; i < nunits; ++i) for (unsigned j = 0; j < word_bytes; ++j) - *elt++ = (i + 1) * word_bytes - j - 1; + elts.quick_push ((i + 1) * word_bytes - j - 1); - if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts)) + if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, &elts)) return false; if (! vec_stmt) @@ -5803,22 +5803,22 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, modifier = NONE; else if (nunits == (unsigned int) scatter_off_nunits / 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits); modifier = WIDEN; + auto_vec_perm_indices sel (scatter_off_nunits); for (i = 0; i < (unsigned int) scatter_off_nunits; ++i) - sel[i] = i | nunits; + sel.quick_push (i | nunits); perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel); gcc_assert (perm_mask != NULL_TREE); } else if (nunits == (unsigned int) scatter_off_nunits * 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, nunits); modifier = NARROW; + auto_vec_perm_indices sel (nunits); for (i = 0; i < (unsigned int) nunits; ++i) - sel[i] = i | scatter_off_nunits; + sel.quick_push (i | scatter_off_nunits); perm_mask = vect_gen_perm_mask_checked (vectype, sel); gcc_assert (perm_mask != NULL_TREE); @@ -6503,19 +6503,19 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, vect_gen_perm_mask_checked. */ tree -vect_gen_perm_mask_any (tree vectype, const unsigned char *sel) +vect_gen_perm_mask_any (tree vectype, vec_perm_indices sel) { tree mask_elt_type, mask_type, mask_vec; - int i, nunits; - nunits = TYPE_VECTOR_SUBPARTS (vectype); + unsigned int nunits = sel.length (); + gcc_checking_assert (nunits == TYPE_VECTOR_SUBPARTS (vectype)); mask_elt_type = lang_hooks.types.type_for_mode (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1); mask_type = get_vectype_for_scalar_type (mask_elt_type); auto_vec mask_elts (nunits); - for (i = 0; i < nunits; ++i) + for (unsigned int i = 0; i < nunits; ++i) mask_elts.quick_push (build_int_cst (mask_elt_type, sel[i])); mask_vec = build_vector (mask_type, mask_elts); @@ -6526,9 +6526,9 @@ vect_gen_perm_mask_any (tree vectype, const unsigned char *sel) i.e. that the target supports the pattern _for arbitrary input vectors_. */ tree -vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel) +vect_gen_perm_mask_checked (tree vectype, vec_perm_indices sel) { - gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel)); + gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, &sel)); return vect_gen_perm_mask_any (vectype, sel); } @@ -6841,22 +6841,22 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, modifier = NONE; else if (nunits == gather_off_nunits / 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits); modifier = WIDEN; + auto_vec_perm_indices sel (gather_off_nunits); for (i = 0; i < gather_off_nunits; ++i) - sel[i] = i | nunits; + sel.quick_push (i | nunits); perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel); } else if (nunits == gather_off_nunits * 2) { - unsigned char *sel = XALLOCAVEC (unsigned char, nunits); modifier = NARROW; + auto_vec_perm_indices sel (nunits); for (i = 0; i < nunits; ++i) - sel[i] = i < gather_off_nunits - ? i : i + nunits - gather_off_nunits; + sel.quick_push (i < gather_off_nunits + ? i : i + nunits - gather_off_nunits); perm_mask = vect_gen_perm_mask_checked (vectype, sel); ncopies *= 2; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 4ee3c3fbcd7..7ed00782148 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1151,8 +1151,8 @@ extern void vect_get_load_cost (struct data_reference *, int, bool, extern void vect_get_store_cost (struct data_reference *, int, unsigned int *, stmt_vector_for_cost *); extern bool vect_supportable_shift (enum tree_code, tree); -extern tree vect_gen_perm_mask_any (tree, const unsigned char *); -extern tree vect_gen_perm_mask_checked (tree, const unsigned char *); +extern tree vect_gen_perm_mask_any (tree, vec_perm_indices); +extern tree vect_gen_perm_mask_checked (tree, vec_perm_indices); extern void optimize_mask_stores (struct loop*); /* In tree-vect-data-refs.c. */ -- 2.30.2